1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 03:02:36 +01:00
llvm-mirror/lib/Support/LockFileManager.cpp

352 lines
11 KiB
C++
Raw Normal View History

//===--- LockFileManager.cpp - File-level Locking Utility------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "llvm/Support/LockFileManager.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Process.h"
#include "llvm/Support/Signals.h"
#include "llvm/Support/raw_ostream.h"
#include <cerrno>
Improve module.pcm lock file performance on machines with high core counts Summary: When building a large Xcode project with multiple module dependencies, and mixed Objective-C & Swift, I observed a large number of clang processes stalling at zero CPU for 30+ seconds throughout the build. This was especially prevalent on my 18-core iMac Pro. After some sampling, the major cause appears to be the lock file implementation for precompiled modules in the module cache. When the lock is heavily contended by multiple clang processes, the exponential backoff runs in lockstep, with some of the processes sleeping for 30+ seconds in order to acquire the file lock. In the attached patch, I implemented a more aggressive polling mechanism that limits the sleep interval to a max of 500ms, and randomizes the wait time. I preserved a limited form of exponential backoff. I also updated the code to use cross-platform timing, thread sleep, and random number capabilities available in C++11. On iMac Pro (2.3 GHz Intel Xeon W, 18 core): Xcode 11.1 bundled clang: 502.2 seconds (average of 5 runs) Custom clang build with LockFileManager patch applied: 276.6 seconds (average of 5 runs) This is a 1.82x speedup for this use case. On MacBook Pro (4 core 3.1GHz Intel i7): Xcode 11.1 bundled clang: 539.4 seconds (average of 2 runs) Custom clang build with LockFileManager patch applied: 509.5 seconds (average of 2 runs) As expected, machines with fewer cores benefit less from this change. ``` Call graph: 2992 Thread_393602 DispatchQueue_1: com.apple.main-thread (serial) 2992 start (in libdyld.dylib) + 1 [0x7fff6a1683d5] 2992 main (in clang) + 297 [0x1097a1059] 2992 driver_main(int, char const**) (in clang) + 2803 [0x1097a5513] 2992 cc1_main(llvm::ArrayRef<char const*>, char const*, void*) (in clang) + 1608 [0x1097a7cc8] 2992 clang::ExecuteCompilerInvocation(clang::CompilerInstance*) (in clang) + 3299 [0x1097dace3] 2992 clang::CompilerInstance::ExecuteAction(clang::FrontendAction&) (in clang) + 509 [0x1097dcc1d] 2992 clang::FrontendAction::Execute() (in clang) + 42 [0x109818b3a] 2992 clang::ParseAST(clang::Sema&, bool, bool) (in clang) + 185 [0x10981b369] 2992 clang::Parser::ParseFirstTopLevelDecl(clang::OpaquePtr<clang::DeclGroupRef>&) (in clang) + 37 [0x10983e9b5] 2992 clang::Parser::ParseTopLevelDecl(clang::OpaquePtr<clang::DeclGroupRef>&) (in clang) + 141 [0x10983ecfd] 2992 clang::Parser::ParseExternalDeclaration(clang::Parser::ParsedAttributesWithRange&, clang::ParsingDeclSpec*) (in clang) + 695 [0x10983f3b7] 2992 clang::Parser::ParseObjCAtDirectives(clang::Parser::ParsedAttributesWithRange&) (in clang) + 637 [0x10a9be9bd] 2992 clang::Parser::ParseModuleImport(clang::SourceLocation) (in clang) + 170 [0x10c4841ba] 2992 clang::Parser::ParseModuleName(clang::SourceLocation, llvm::SmallVectorImpl<std::__1::pair<clang::IdentifierInfo*, clang::SourceLocation> >&, bool) (in clang) + 503 [0x10c485267] 2992 clang::Preprocessor::Lex(clang::Token&) (in clang) + 316 [0x1098285cc] 2992 clang::Preprocessor::LexAfterModuleImport(clang::Token&) (in clang) + 690 [0x10cc7af62] 2992 clang::CompilerInstance::loadModule(clang::SourceLocation, llvm::ArrayRef<std::__1::pair<clang::IdentifierInfo*, clang::SourceLocation> >, clang::Module::NameVisibilityKind, bool) (in clang) + 7989 [0x10bba6535] 2992 compileAndLoadModule(clang::CompilerInstance&, clang::SourceLocation, clang::SourceLocation, clang::Module*, llvm::StringRef) (in clang) + 296 [0x10bba8318] 2992 llvm::LockFileManager::waitForUnlock() (in clang) + 91 [0x10b6953ab] 2992 nanosleep (in libsystem_c.dylib) + 199 [0x7fff6a22c914] 2992 __semwait_signal (in libsystem_kernel.dylib) + 10 [0x7fff6a2a0f32] ``` Differential Revision: https://reviews.llvm.org/D69575
2020-03-23 22:16:55 +01:00
#include <chrono>
#include <ctime>
#include <memory>
Improve module.pcm lock file performance on machines with high core counts Summary: When building a large Xcode project with multiple module dependencies, and mixed Objective-C & Swift, I observed a large number of clang processes stalling at zero CPU for 30+ seconds throughout the build. This was especially prevalent on my 18-core iMac Pro. After some sampling, the major cause appears to be the lock file implementation for precompiled modules in the module cache. When the lock is heavily contended by multiple clang processes, the exponential backoff runs in lockstep, with some of the processes sleeping for 30+ seconds in order to acquire the file lock. In the attached patch, I implemented a more aggressive polling mechanism that limits the sleep interval to a max of 500ms, and randomizes the wait time. I preserved a limited form of exponential backoff. I also updated the code to use cross-platform timing, thread sleep, and random number capabilities available in C++11. On iMac Pro (2.3 GHz Intel Xeon W, 18 core): Xcode 11.1 bundled clang: 502.2 seconds (average of 5 runs) Custom clang build with LockFileManager patch applied: 276.6 seconds (average of 5 runs) This is a 1.82x speedup for this use case. On MacBook Pro (4 core 3.1GHz Intel i7): Xcode 11.1 bundled clang: 539.4 seconds (average of 2 runs) Custom clang build with LockFileManager patch applied: 509.5 seconds (average of 2 runs) As expected, machines with fewer cores benefit less from this change. ``` Call graph: 2992 Thread_393602 DispatchQueue_1: com.apple.main-thread (serial) 2992 start (in libdyld.dylib) + 1 [0x7fff6a1683d5] 2992 main (in clang) + 297 [0x1097a1059] 2992 driver_main(int, char const**) (in clang) + 2803 [0x1097a5513] 2992 cc1_main(llvm::ArrayRef<char const*>, char const*, void*) (in clang) + 1608 [0x1097a7cc8] 2992 clang::ExecuteCompilerInvocation(clang::CompilerInstance*) (in clang) + 3299 [0x1097dace3] 2992 clang::CompilerInstance::ExecuteAction(clang::FrontendAction&) (in clang) + 509 [0x1097dcc1d] 2992 clang::FrontendAction::Execute() (in clang) + 42 [0x109818b3a] 2992 clang::ParseAST(clang::Sema&, bool, bool) (in clang) + 185 [0x10981b369] 2992 clang::Parser::ParseFirstTopLevelDecl(clang::OpaquePtr<clang::DeclGroupRef>&) (in clang) + 37 [0x10983e9b5] 2992 clang::Parser::ParseTopLevelDecl(clang::OpaquePtr<clang::DeclGroupRef>&) (in clang) + 141 [0x10983ecfd] 2992 clang::Parser::ParseExternalDeclaration(clang::Parser::ParsedAttributesWithRange&, clang::ParsingDeclSpec*) (in clang) + 695 [0x10983f3b7] 2992 clang::Parser::ParseObjCAtDirectives(clang::Parser::ParsedAttributesWithRange&) (in clang) + 637 [0x10a9be9bd] 2992 clang::Parser::ParseModuleImport(clang::SourceLocation) (in clang) + 170 [0x10c4841ba] 2992 clang::Parser::ParseModuleName(clang::SourceLocation, llvm::SmallVectorImpl<std::__1::pair<clang::IdentifierInfo*, clang::SourceLocation> >&, bool) (in clang) + 503 [0x10c485267] 2992 clang::Preprocessor::Lex(clang::Token&) (in clang) + 316 [0x1098285cc] 2992 clang::Preprocessor::LexAfterModuleImport(clang::Token&) (in clang) + 690 [0x10cc7af62] 2992 clang::CompilerInstance::loadModule(clang::SourceLocation, llvm::ArrayRef<std::__1::pair<clang::IdentifierInfo*, clang::SourceLocation> >, clang::Module::NameVisibilityKind, bool) (in clang) + 7989 [0x10bba6535] 2992 compileAndLoadModule(clang::CompilerInstance&, clang::SourceLocation, clang::SourceLocation, clang::Module*, llvm::StringRef) (in clang) + 296 [0x10bba8318] 2992 llvm::LockFileManager::waitForUnlock() (in clang) + 91 [0x10b6953ab] 2992 nanosleep (in libsystem_c.dylib) + 199 [0x7fff6a22c914] 2992 __semwait_signal (in libsystem_kernel.dylib) + 10 [0x7fff6a2a0f32] ``` Differential Revision: https://reviews.llvm.org/D69575
2020-03-23 22:16:55 +01:00
#include <random>
#include <sys/stat.h>
#include <sys/types.h>
#include <system_error>
Improve module.pcm lock file performance on machines with high core counts Summary: When building a large Xcode project with multiple module dependencies, and mixed Objective-C & Swift, I observed a large number of clang processes stalling at zero CPU for 30+ seconds throughout the build. This was especially prevalent on my 18-core iMac Pro. After some sampling, the major cause appears to be the lock file implementation for precompiled modules in the module cache. When the lock is heavily contended by multiple clang processes, the exponential backoff runs in lockstep, with some of the processes sleeping for 30+ seconds in order to acquire the file lock. In the attached patch, I implemented a more aggressive polling mechanism that limits the sleep interval to a max of 500ms, and randomizes the wait time. I preserved a limited form of exponential backoff. I also updated the code to use cross-platform timing, thread sleep, and random number capabilities available in C++11. On iMac Pro (2.3 GHz Intel Xeon W, 18 core): Xcode 11.1 bundled clang: 502.2 seconds (average of 5 runs) Custom clang build with LockFileManager patch applied: 276.6 seconds (average of 5 runs) This is a 1.82x speedup for this use case. On MacBook Pro (4 core 3.1GHz Intel i7): Xcode 11.1 bundled clang: 539.4 seconds (average of 2 runs) Custom clang build with LockFileManager patch applied: 509.5 seconds (average of 2 runs) As expected, machines with fewer cores benefit less from this change. ``` Call graph: 2992 Thread_393602 DispatchQueue_1: com.apple.main-thread (serial) 2992 start (in libdyld.dylib) + 1 [0x7fff6a1683d5] 2992 main (in clang) + 297 [0x1097a1059] 2992 driver_main(int, char const**) (in clang) + 2803 [0x1097a5513] 2992 cc1_main(llvm::ArrayRef<char const*>, char const*, void*) (in clang) + 1608 [0x1097a7cc8] 2992 clang::ExecuteCompilerInvocation(clang::CompilerInstance*) (in clang) + 3299 [0x1097dace3] 2992 clang::CompilerInstance::ExecuteAction(clang::FrontendAction&) (in clang) + 509 [0x1097dcc1d] 2992 clang::FrontendAction::Execute() (in clang) + 42 [0x109818b3a] 2992 clang::ParseAST(clang::Sema&, bool, bool) (in clang) + 185 [0x10981b369] 2992 clang::Parser::ParseFirstTopLevelDecl(clang::OpaquePtr<clang::DeclGroupRef>&) (in clang) + 37 [0x10983e9b5] 2992 clang::Parser::ParseTopLevelDecl(clang::OpaquePtr<clang::DeclGroupRef>&) (in clang) + 141 [0x10983ecfd] 2992 clang::Parser::ParseExternalDeclaration(clang::Parser::ParsedAttributesWithRange&, clang::ParsingDeclSpec*) (in clang) + 695 [0x10983f3b7] 2992 clang::Parser::ParseObjCAtDirectives(clang::Parser::ParsedAttributesWithRange&) (in clang) + 637 [0x10a9be9bd] 2992 clang::Parser::ParseModuleImport(clang::SourceLocation) (in clang) + 170 [0x10c4841ba] 2992 clang::Parser::ParseModuleName(clang::SourceLocation, llvm::SmallVectorImpl<std::__1::pair<clang::IdentifierInfo*, clang::SourceLocation> >&, bool) (in clang) + 503 [0x10c485267] 2992 clang::Preprocessor::Lex(clang::Token&) (in clang) + 316 [0x1098285cc] 2992 clang::Preprocessor::LexAfterModuleImport(clang::Token&) (in clang) + 690 [0x10cc7af62] 2992 clang::CompilerInstance::loadModule(clang::SourceLocation, llvm::ArrayRef<std::__1::pair<clang::IdentifierInfo*, clang::SourceLocation> >, clang::Module::NameVisibilityKind, bool) (in clang) + 7989 [0x10bba6535] 2992 compileAndLoadModule(clang::CompilerInstance&, clang::SourceLocation, clang::SourceLocation, clang::Module*, llvm::StringRef) (in clang) + 296 [0x10bba8318] 2992 llvm::LockFileManager::waitForUnlock() (in clang) + 91 [0x10b6953ab] 2992 nanosleep (in libsystem_c.dylib) + 199 [0x7fff6a22c914] 2992 __semwait_signal (in libsystem_kernel.dylib) + 10 [0x7fff6a2a0f32] ``` Differential Revision: https://reviews.llvm.org/D69575
2020-03-23 22:16:55 +01:00
#include <thread>
#include <tuple>
Improve module.pcm lock file performance on machines with high core counts Summary: When building a large Xcode project with multiple module dependencies, and mixed Objective-C & Swift, I observed a large number of clang processes stalling at zero CPU for 30+ seconds throughout the build. This was especially prevalent on my 18-core iMac Pro. After some sampling, the major cause appears to be the lock file implementation for precompiled modules in the module cache. When the lock is heavily contended by multiple clang processes, the exponential backoff runs in lockstep, with some of the processes sleeping for 30+ seconds in order to acquire the file lock. In the attached patch, I implemented a more aggressive polling mechanism that limits the sleep interval to a max of 500ms, and randomizes the wait time. I preserved a limited form of exponential backoff. I also updated the code to use cross-platform timing, thread sleep, and random number capabilities available in C++11. On iMac Pro (2.3 GHz Intel Xeon W, 18 core): Xcode 11.1 bundled clang: 502.2 seconds (average of 5 runs) Custom clang build with LockFileManager patch applied: 276.6 seconds (average of 5 runs) This is a 1.82x speedup for this use case. On MacBook Pro (4 core 3.1GHz Intel i7): Xcode 11.1 bundled clang: 539.4 seconds (average of 2 runs) Custom clang build with LockFileManager patch applied: 509.5 seconds (average of 2 runs) As expected, machines with fewer cores benefit less from this change. ``` Call graph: 2992 Thread_393602 DispatchQueue_1: com.apple.main-thread (serial) 2992 start (in libdyld.dylib) + 1 [0x7fff6a1683d5] 2992 main (in clang) + 297 [0x1097a1059] 2992 driver_main(int, char const**) (in clang) + 2803 [0x1097a5513] 2992 cc1_main(llvm::ArrayRef<char const*>, char const*, void*) (in clang) + 1608 [0x1097a7cc8] 2992 clang::ExecuteCompilerInvocation(clang::CompilerInstance*) (in clang) + 3299 [0x1097dace3] 2992 clang::CompilerInstance::ExecuteAction(clang::FrontendAction&) (in clang) + 509 [0x1097dcc1d] 2992 clang::FrontendAction::Execute() (in clang) + 42 [0x109818b3a] 2992 clang::ParseAST(clang::Sema&, bool, bool) (in clang) + 185 [0x10981b369] 2992 clang::Parser::ParseFirstTopLevelDecl(clang::OpaquePtr<clang::DeclGroupRef>&) (in clang) + 37 [0x10983e9b5] 2992 clang::Parser::ParseTopLevelDecl(clang::OpaquePtr<clang::DeclGroupRef>&) (in clang) + 141 [0x10983ecfd] 2992 clang::Parser::ParseExternalDeclaration(clang::Parser::ParsedAttributesWithRange&, clang::ParsingDeclSpec*) (in clang) + 695 [0x10983f3b7] 2992 clang::Parser::ParseObjCAtDirectives(clang::Parser::ParsedAttributesWithRange&) (in clang) + 637 [0x10a9be9bd] 2992 clang::Parser::ParseModuleImport(clang::SourceLocation) (in clang) + 170 [0x10c4841ba] 2992 clang::Parser::ParseModuleName(clang::SourceLocation, llvm::SmallVectorImpl<std::__1::pair<clang::IdentifierInfo*, clang::SourceLocation> >&, bool) (in clang) + 503 [0x10c485267] 2992 clang::Preprocessor::Lex(clang::Token&) (in clang) + 316 [0x1098285cc] 2992 clang::Preprocessor::LexAfterModuleImport(clang::Token&) (in clang) + 690 [0x10cc7af62] 2992 clang::CompilerInstance::loadModule(clang::SourceLocation, llvm::ArrayRef<std::__1::pair<clang::IdentifierInfo*, clang::SourceLocation> >, clang::Module::NameVisibilityKind, bool) (in clang) + 7989 [0x10bba6535] 2992 compileAndLoadModule(clang::CompilerInstance&, clang::SourceLocation, clang::SourceLocation, clang::Module*, llvm::StringRef) (in clang) + 296 [0x10bba8318] 2992 llvm::LockFileManager::waitForUnlock() (in clang) + 91 [0x10b6953ab] 2992 nanosleep (in libsystem_c.dylib) + 199 [0x7fff6a22c914] 2992 __semwait_signal (in libsystem_kernel.dylib) + 10 [0x7fff6a2a0f32] ``` Differential Revision: https://reviews.llvm.org/D69575
2020-03-23 22:16:55 +01:00
#ifdef _WIN32
#include <windows.h>
#endif
#if LLVM_ON_UNIX
#include <unistd.h>
#endif
#if defined(__APPLE__) && defined(__MAC_OS_X_VERSION_MIN_REQUIRED) && (__MAC_OS_X_VERSION_MIN_REQUIRED > 1050)
#define USE_OSX_GETHOSTUUID 1
#else
#define USE_OSX_GETHOSTUUID 0
#endif
#if USE_OSX_GETHOSTUUID
#include <uuid/uuid.h>
#endif
using namespace llvm;
/// Attempt to read the lock file with the given name, if it exists.
///
/// \param LockFileName The name of the lock file to read.
///
/// \returns The process ID of the process that owns this lock file
Optional<std::pair<std::string, int> >
LockFileManager::readLockFile(StringRef LockFileName) {
// Read the owning host and PID out of the lock file. If it appears that the
// owning process is dead, the lock file is invalid.
ErrorOr<std::unique_ptr<MemoryBuffer>> MBOrErr =
MemoryBuffer::getFile(LockFileName);
if (!MBOrErr) {
sys::fs::remove(LockFileName);
return None;
}
MemoryBuffer &MB = *MBOrErr.get();
StringRef Hostname;
StringRef PIDStr;
std::tie(Hostname, PIDStr) = getToken(MB.getBuffer(), " ");
PIDStr = PIDStr.substr(PIDStr.find_first_not_of(" "));
int PID;
if (!PIDStr.getAsInteger(10, PID)) {
auto Owner = std::make_pair(std::string(Hostname), PID);
if (processStillExecuting(Owner.first, Owner.second))
return Owner;
}
// Delete the lock file. It's invalid anyway.
sys::fs::remove(LockFileName);
return None;
}
static std::error_code getHostID(SmallVectorImpl<char> &HostID) {
HostID.clear();
#if USE_OSX_GETHOSTUUID
// On OS X, use the more stable hardware UUID instead of hostname.
struct timespec wait = {1, 0}; // 1 second.
uuid_t uuid;
if (gethostuuid(uuid, &wait) != 0)
return std::error_code(errno, std::system_category());
uuid_string_t UUIDStr;
uuid_unparse(uuid, UUIDStr);
StringRef UUIDRef(UUIDStr);
HostID.append(UUIDRef.begin(), UUIDRef.end());
#elif LLVM_ON_UNIX
char HostName[256];
HostName[255] = 0;
HostName[0] = 0;
gethostname(HostName, 255);
StringRef HostNameRef(HostName);
HostID.append(HostNameRef.begin(), HostNameRef.end());
#else
StringRef Dummy("localhost");
HostID.append(Dummy.begin(), Dummy.end());
#endif
return std::error_code();
}
bool LockFileManager::processStillExecuting(StringRef HostID, int PID) {
#if LLVM_ON_UNIX && !defined(__ANDROID__)
SmallString<256> StoredHostID;
if (getHostID(StoredHostID))
return true; // Conservatively assume it's executing on error.
// Check whether the process is dead. If so, we're done.
if (StoredHostID == HostID && getsid(PID) == -1 && errno == ESRCH)
return false;
#endif
return true;
}
LTO: Keep file handles open for memory mapped files. On Windows we've observed that if you open a file, write to it, map it into memory and close the file handle, the contents of the memory mapping can sometimes be incorrect. That was what we did when adding an entry to the ThinLTO cache using the TempFile and MemoryBuffer classes, and it was causing intermittent build failures on Chromium's ThinLTO bots on Windows. More details are in the associated Chromium bug (crbug.com/786127). We can prevent this from happening by keeping a handle to the file open while the mapping is active. So this patch changes the mapped_file_region class to duplicate the file handle when mapping the file and close it upon unmapping it. One gotcha is that the file handle that we keep open must not have been created with FILE_FLAG_DELETE_ON_CLOSE, as otherwise the operating system will prevent other processes from opening the file. We can achieve this by avoiding the use of FILE_FLAG_DELETE_ON_CLOSE altogether. Instead, we use SetFileInformationByHandle with FileDispositionInfo to manage the delete-on-close bit. This lets us remove the hack that we used to use to clear the delete-on-close bit on a file opened with FILE_FLAG_DELETE_ON_CLOSE. A downside of using SetFileInformationByHandle/FileDispositionInfo as opposed to FILE_FLAG_DELETE_ON_CLOSE is that it prevents us from using CreateFile to open the file while the flag is set, even within the same process. This doesn't seem to matter for almost every client of TempFile, except for LockFileManager, which calls sys::fs::create_link to create a hard link from the lock file, and in the process of doing so tries to open the file. To prevent this change from breaking LockFileManager I changed it to stop using TempFile by effectively reverting r318550. Differential Revision: https://reviews.llvm.org/D48051 llvm-svn: 334630
2018-06-13 20:03:14 +02:00
namespace {
/// An RAII helper object ensure that the unique lock file is removed.
///
/// Ensures that if there is an error or a signal before we finish acquiring the
/// lock, the unique file will be removed. And if we successfully take the lock,
/// the signal handler is left in place so that signals while the lock is held
/// will remove the unique lock file. The caller should ensure there is a
/// matching call to sys::DontRemoveFileOnSignal when the lock is released.
class RemoveUniqueLockFileOnSignal {
StringRef Filename;
bool RemoveImmediately;
public:
RemoveUniqueLockFileOnSignal(StringRef Name)
: Filename(Name), RemoveImmediately(true) {
sys::RemoveFileOnSignal(Filename, nullptr);
}
~RemoveUniqueLockFileOnSignal() {
if (!RemoveImmediately) {
// Leave the signal handler enabled. It will be removed when the lock is
// released.
return;
}
sys::fs::remove(Filename);
sys::DontRemoveFileOnSignal(Filename);
}
void lockAcquired() { RemoveImmediately = false; }
};
} // end anonymous namespace
LockFileManager::LockFileManager(StringRef FileName)
{
this->FileName = FileName;
if (std::error_code EC = sys::fs::make_absolute(this->FileName)) {
std::string S("failed to obtain absolute path for ");
S.append(std::string(this->FileName.str()));
setError(EC, S);
return;
}
LockFileName = this->FileName;
LockFileName += ".lock";
// If the lock file already exists, don't bother to try to create our own
// lock file; it won't work anyway. Just figure out who owns this lock file.
if ((Owner = readLockFile(LockFileName)))
return;
// Create a lock file that is unique to this instance.
LTO: Keep file handles open for memory mapped files. On Windows we've observed that if you open a file, write to it, map it into memory and close the file handle, the contents of the memory mapping can sometimes be incorrect. That was what we did when adding an entry to the ThinLTO cache using the TempFile and MemoryBuffer classes, and it was causing intermittent build failures on Chromium's ThinLTO bots on Windows. More details are in the associated Chromium bug (crbug.com/786127). We can prevent this from happening by keeping a handle to the file open while the mapping is active. So this patch changes the mapped_file_region class to duplicate the file handle when mapping the file and close it upon unmapping it. One gotcha is that the file handle that we keep open must not have been created with FILE_FLAG_DELETE_ON_CLOSE, as otherwise the operating system will prevent other processes from opening the file. We can achieve this by avoiding the use of FILE_FLAG_DELETE_ON_CLOSE altogether. Instead, we use SetFileInformationByHandle with FileDispositionInfo to manage the delete-on-close bit. This lets us remove the hack that we used to use to clear the delete-on-close bit on a file opened with FILE_FLAG_DELETE_ON_CLOSE. A downside of using SetFileInformationByHandle/FileDispositionInfo as opposed to FILE_FLAG_DELETE_ON_CLOSE is that it prevents us from using CreateFile to open the file while the flag is set, even within the same process. This doesn't seem to matter for almost every client of TempFile, except for LockFileManager, which calls sys::fs::create_link to create a hard link from the lock file, and in the process of doing so tries to open the file. To prevent this change from breaking LockFileManager I changed it to stop using TempFile by effectively reverting r318550. Differential Revision: https://reviews.llvm.org/D48051 llvm-svn: 334630
2018-06-13 20:03:14 +02:00
UniqueLockFileName = LockFileName;
UniqueLockFileName += "-%%%%%%%%";
int UniqueLockFileID;
if (std::error_code EC = sys::fs::createUniqueFile(
UniqueLockFileName, UniqueLockFileID, UniqueLockFileName)) {
std::string S("failed to create unique file ");
S.append(std::string(UniqueLockFileName.str()));
setError(EC, S);
return;
}
// Write our process ID to our unique lock file.
{
SmallString<256> HostID;
if (auto EC = getHostID(HostID)) {
setError(EC, "failed to get host id");
return;
}
LTO: Keep file handles open for memory mapped files. On Windows we've observed that if you open a file, write to it, map it into memory and close the file handle, the contents of the memory mapping can sometimes be incorrect. That was what we did when adding an entry to the ThinLTO cache using the TempFile and MemoryBuffer classes, and it was causing intermittent build failures on Chromium's ThinLTO bots on Windows. More details are in the associated Chromium bug (crbug.com/786127). We can prevent this from happening by keeping a handle to the file open while the mapping is active. So this patch changes the mapped_file_region class to duplicate the file handle when mapping the file and close it upon unmapping it. One gotcha is that the file handle that we keep open must not have been created with FILE_FLAG_DELETE_ON_CLOSE, as otherwise the operating system will prevent other processes from opening the file. We can achieve this by avoiding the use of FILE_FLAG_DELETE_ON_CLOSE altogether. Instead, we use SetFileInformationByHandle with FileDispositionInfo to manage the delete-on-close bit. This lets us remove the hack that we used to use to clear the delete-on-close bit on a file opened with FILE_FLAG_DELETE_ON_CLOSE. A downside of using SetFileInformationByHandle/FileDispositionInfo as opposed to FILE_FLAG_DELETE_ON_CLOSE is that it prevents us from using CreateFile to open the file while the flag is set, even within the same process. This doesn't seem to matter for almost every client of TempFile, except for LockFileManager, which calls sys::fs::create_link to create a hard link from the lock file, and in the process of doing so tries to open the file. To prevent this change from breaking LockFileManager I changed it to stop using TempFile by effectively reverting r318550. Differential Revision: https://reviews.llvm.org/D48051 llvm-svn: 334630
2018-06-13 20:03:14 +02:00
raw_fd_ostream Out(UniqueLockFileID, /*shouldClose=*/true);
Out << HostID << ' ' << sys::Process::getProcessId();
LTO: Keep file handles open for memory mapped files. On Windows we've observed that if you open a file, write to it, map it into memory and close the file handle, the contents of the memory mapping can sometimes be incorrect. That was what we did when adding an entry to the ThinLTO cache using the TempFile and MemoryBuffer classes, and it was causing intermittent build failures on Chromium's ThinLTO bots on Windows. More details are in the associated Chromium bug (crbug.com/786127). We can prevent this from happening by keeping a handle to the file open while the mapping is active. So this patch changes the mapped_file_region class to duplicate the file handle when mapping the file and close it upon unmapping it. One gotcha is that the file handle that we keep open must not have been created with FILE_FLAG_DELETE_ON_CLOSE, as otherwise the operating system will prevent other processes from opening the file. We can achieve this by avoiding the use of FILE_FLAG_DELETE_ON_CLOSE altogether. Instead, we use SetFileInformationByHandle with FileDispositionInfo to manage the delete-on-close bit. This lets us remove the hack that we used to use to clear the delete-on-close bit on a file opened with FILE_FLAG_DELETE_ON_CLOSE. A downside of using SetFileInformationByHandle/FileDispositionInfo as opposed to FILE_FLAG_DELETE_ON_CLOSE is that it prevents us from using CreateFile to open the file while the flag is set, even within the same process. This doesn't seem to matter for almost every client of TempFile, except for LockFileManager, which calls sys::fs::create_link to create a hard link from the lock file, and in the process of doing so tries to open the file. To prevent this change from breaking LockFileManager I changed it to stop using TempFile by effectively reverting r318550. Differential Revision: https://reviews.llvm.org/D48051 llvm-svn: 334630
2018-06-13 20:03:14 +02:00
Out.close();
if (Out.has_error()) {
// We failed to write out PID, so report the error, remove the
// unique lock file, and fail.
std::string S("failed to write to ");
S.append(std::string(UniqueLockFileName.str()));
setError(Out.error(), S);
LTO: Keep file handles open for memory mapped files. On Windows we've observed that if you open a file, write to it, map it into memory and close the file handle, the contents of the memory mapping can sometimes be incorrect. That was what we did when adding an entry to the ThinLTO cache using the TempFile and MemoryBuffer classes, and it was causing intermittent build failures on Chromium's ThinLTO bots on Windows. More details are in the associated Chromium bug (crbug.com/786127). We can prevent this from happening by keeping a handle to the file open while the mapping is active. So this patch changes the mapped_file_region class to duplicate the file handle when mapping the file and close it upon unmapping it. One gotcha is that the file handle that we keep open must not have been created with FILE_FLAG_DELETE_ON_CLOSE, as otherwise the operating system will prevent other processes from opening the file. We can achieve this by avoiding the use of FILE_FLAG_DELETE_ON_CLOSE altogether. Instead, we use SetFileInformationByHandle with FileDispositionInfo to manage the delete-on-close bit. This lets us remove the hack that we used to use to clear the delete-on-close bit on a file opened with FILE_FLAG_DELETE_ON_CLOSE. A downside of using SetFileInformationByHandle/FileDispositionInfo as opposed to FILE_FLAG_DELETE_ON_CLOSE is that it prevents us from using CreateFile to open the file while the flag is set, even within the same process. This doesn't seem to matter for almost every client of TempFile, except for LockFileManager, which calls sys::fs::create_link to create a hard link from the lock file, and in the process of doing so tries to open the file. To prevent this change from breaking LockFileManager I changed it to stop using TempFile by effectively reverting r318550. Differential Revision: https://reviews.llvm.org/D48051 llvm-svn: 334630
2018-06-13 20:03:14 +02:00
sys::fs::remove(UniqueLockFileName);
return;
}
}
LTO: Keep file handles open for memory mapped files. On Windows we've observed that if you open a file, write to it, map it into memory and close the file handle, the contents of the memory mapping can sometimes be incorrect. That was what we did when adding an entry to the ThinLTO cache using the TempFile and MemoryBuffer classes, and it was causing intermittent build failures on Chromium's ThinLTO bots on Windows. More details are in the associated Chromium bug (crbug.com/786127). We can prevent this from happening by keeping a handle to the file open while the mapping is active. So this patch changes the mapped_file_region class to duplicate the file handle when mapping the file and close it upon unmapping it. One gotcha is that the file handle that we keep open must not have been created with FILE_FLAG_DELETE_ON_CLOSE, as otherwise the operating system will prevent other processes from opening the file. We can achieve this by avoiding the use of FILE_FLAG_DELETE_ON_CLOSE altogether. Instead, we use SetFileInformationByHandle with FileDispositionInfo to manage the delete-on-close bit. This lets us remove the hack that we used to use to clear the delete-on-close bit on a file opened with FILE_FLAG_DELETE_ON_CLOSE. A downside of using SetFileInformationByHandle/FileDispositionInfo as opposed to FILE_FLAG_DELETE_ON_CLOSE is that it prevents us from using CreateFile to open the file while the flag is set, even within the same process. This doesn't seem to matter for almost every client of TempFile, except for LockFileManager, which calls sys::fs::create_link to create a hard link from the lock file, and in the process of doing so tries to open the file. To prevent this change from breaking LockFileManager I changed it to stop using TempFile by effectively reverting r318550. Differential Revision: https://reviews.llvm.org/D48051 llvm-svn: 334630
2018-06-13 20:03:14 +02:00
// Clean up the unique file on signal, which also releases the lock if it is
// held since the .lock symlink will point to a nonexistent file.
RemoveUniqueLockFileOnSignal RemoveUniqueFile(UniqueLockFileName);
while (true) {
// Create a link from the lock file name. If this succeeds, we're done.
std::error_code EC =
LTO: Keep file handles open for memory mapped files. On Windows we've observed that if you open a file, write to it, map it into memory and close the file handle, the contents of the memory mapping can sometimes be incorrect. That was what we did when adding an entry to the ThinLTO cache using the TempFile and MemoryBuffer classes, and it was causing intermittent build failures on Chromium's ThinLTO bots on Windows. More details are in the associated Chromium bug (crbug.com/786127). We can prevent this from happening by keeping a handle to the file open while the mapping is active. So this patch changes the mapped_file_region class to duplicate the file handle when mapping the file and close it upon unmapping it. One gotcha is that the file handle that we keep open must not have been created with FILE_FLAG_DELETE_ON_CLOSE, as otherwise the operating system will prevent other processes from opening the file. We can achieve this by avoiding the use of FILE_FLAG_DELETE_ON_CLOSE altogether. Instead, we use SetFileInformationByHandle with FileDispositionInfo to manage the delete-on-close bit. This lets us remove the hack that we used to use to clear the delete-on-close bit on a file opened with FILE_FLAG_DELETE_ON_CLOSE. A downside of using SetFileInformationByHandle/FileDispositionInfo as opposed to FILE_FLAG_DELETE_ON_CLOSE is that it prevents us from using CreateFile to open the file while the flag is set, even within the same process. This doesn't seem to matter for almost every client of TempFile, except for LockFileManager, which calls sys::fs::create_link to create a hard link from the lock file, and in the process of doing so tries to open the file. To prevent this change from breaking LockFileManager I changed it to stop using TempFile by effectively reverting r318550. Differential Revision: https://reviews.llvm.org/D48051 llvm-svn: 334630
2018-06-13 20:03:14 +02:00
sys::fs::create_link(UniqueLockFileName, LockFileName);
if (!EC) {
LTO: Keep file handles open for memory mapped files. On Windows we've observed that if you open a file, write to it, map it into memory and close the file handle, the contents of the memory mapping can sometimes be incorrect. That was what we did when adding an entry to the ThinLTO cache using the TempFile and MemoryBuffer classes, and it was causing intermittent build failures on Chromium's ThinLTO bots on Windows. More details are in the associated Chromium bug (crbug.com/786127). We can prevent this from happening by keeping a handle to the file open while the mapping is active. So this patch changes the mapped_file_region class to duplicate the file handle when mapping the file and close it upon unmapping it. One gotcha is that the file handle that we keep open must not have been created with FILE_FLAG_DELETE_ON_CLOSE, as otherwise the operating system will prevent other processes from opening the file. We can achieve this by avoiding the use of FILE_FLAG_DELETE_ON_CLOSE altogether. Instead, we use SetFileInformationByHandle with FileDispositionInfo to manage the delete-on-close bit. This lets us remove the hack that we used to use to clear the delete-on-close bit on a file opened with FILE_FLAG_DELETE_ON_CLOSE. A downside of using SetFileInformationByHandle/FileDispositionInfo as opposed to FILE_FLAG_DELETE_ON_CLOSE is that it prevents us from using CreateFile to open the file while the flag is set, even within the same process. This doesn't seem to matter for almost every client of TempFile, except for LockFileManager, which calls sys::fs::create_link to create a hard link from the lock file, and in the process of doing so tries to open the file. To prevent this change from breaking LockFileManager I changed it to stop using TempFile by effectively reverting r318550. Differential Revision: https://reviews.llvm.org/D48051 llvm-svn: 334630
2018-06-13 20:03:14 +02:00
RemoveUniqueFile.lockAcquired();
return;
}
if (EC != errc::file_exists) {
std::string S("failed to create link ");
raw_string_ostream OSS(S);
LTO: Keep file handles open for memory mapped files. On Windows we've observed that if you open a file, write to it, map it into memory and close the file handle, the contents of the memory mapping can sometimes be incorrect. That was what we did when adding an entry to the ThinLTO cache using the TempFile and MemoryBuffer classes, and it was causing intermittent build failures on Chromium's ThinLTO bots on Windows. More details are in the associated Chromium bug (crbug.com/786127). We can prevent this from happening by keeping a handle to the file open while the mapping is active. So this patch changes the mapped_file_region class to duplicate the file handle when mapping the file and close it upon unmapping it. One gotcha is that the file handle that we keep open must not have been created with FILE_FLAG_DELETE_ON_CLOSE, as otherwise the operating system will prevent other processes from opening the file. We can achieve this by avoiding the use of FILE_FLAG_DELETE_ON_CLOSE altogether. Instead, we use SetFileInformationByHandle with FileDispositionInfo to manage the delete-on-close bit. This lets us remove the hack that we used to use to clear the delete-on-close bit on a file opened with FILE_FLAG_DELETE_ON_CLOSE. A downside of using SetFileInformationByHandle/FileDispositionInfo as opposed to FILE_FLAG_DELETE_ON_CLOSE is that it prevents us from using CreateFile to open the file while the flag is set, even within the same process. This doesn't seem to matter for almost every client of TempFile, except for LockFileManager, which calls sys::fs::create_link to create a hard link from the lock file, and in the process of doing so tries to open the file. To prevent this change from breaking LockFileManager I changed it to stop using TempFile by effectively reverting r318550. Differential Revision: https://reviews.llvm.org/D48051 llvm-svn: 334630
2018-06-13 20:03:14 +02:00
OSS << LockFileName.str() << " to " << UniqueLockFileName.str();
setError(EC, OSS.str());
return;
}
// Someone else managed to create the lock file first. Read the process ID
// from the lock file.
LTO: Keep file handles open for memory mapped files. On Windows we've observed that if you open a file, write to it, map it into memory and close the file handle, the contents of the memory mapping can sometimes be incorrect. That was what we did when adding an entry to the ThinLTO cache using the TempFile and MemoryBuffer classes, and it was causing intermittent build failures on Chromium's ThinLTO bots on Windows. More details are in the associated Chromium bug (crbug.com/786127). We can prevent this from happening by keeping a handle to the file open while the mapping is active. So this patch changes the mapped_file_region class to duplicate the file handle when mapping the file and close it upon unmapping it. One gotcha is that the file handle that we keep open must not have been created with FILE_FLAG_DELETE_ON_CLOSE, as otherwise the operating system will prevent other processes from opening the file. We can achieve this by avoiding the use of FILE_FLAG_DELETE_ON_CLOSE altogether. Instead, we use SetFileInformationByHandle with FileDispositionInfo to manage the delete-on-close bit. This lets us remove the hack that we used to use to clear the delete-on-close bit on a file opened with FILE_FLAG_DELETE_ON_CLOSE. A downside of using SetFileInformationByHandle/FileDispositionInfo as opposed to FILE_FLAG_DELETE_ON_CLOSE is that it prevents us from using CreateFile to open the file while the flag is set, even within the same process. This doesn't seem to matter for almost every client of TempFile, except for LockFileManager, which calls sys::fs::create_link to create a hard link from the lock file, and in the process of doing so tries to open the file. To prevent this change from breaking LockFileManager I changed it to stop using TempFile by effectively reverting r318550. Differential Revision: https://reviews.llvm.org/D48051 llvm-svn: 334630
2018-06-13 20:03:14 +02:00
if ((Owner = readLockFile(LockFileName))) {
// Wipe out our unique lock file (it's useless now)
sys::fs::remove(UniqueLockFileName);
return;
}
if (!sys::fs::exists(LockFileName)) {
// The previous owner released the lock file before we could read it.
// Try to get ownership again.
continue;
}
// There is a lock file that nobody owns; try to clean it up and get
// ownership.
if ((EC = sys::fs::remove(LockFileName))) {
std::string S("failed to remove lockfile ");
S.append(std::string(UniqueLockFileName.str()));
setError(EC, S);
return;
}
}
}
LockFileManager::LockFileState LockFileManager::getState() const {
if (Owner)
return LFS_Shared;
if (ErrorCode)
return LFS_Error;
return LFS_Owned;
}
std::string LockFileManager::getErrorMessage() const {
if (ErrorCode) {
std::string Str(ErrorDiagMsg);
std::string ErrCodeMsg = ErrorCode.message();
raw_string_ostream OSS(Str);
if (!ErrCodeMsg.empty())
2017-11-14 00:06:54 +01:00
OSS << ": " << ErrCodeMsg;
return OSS.str();
}
return "";
}
LockFileManager::~LockFileManager() {
if (getState() != LFS_Owned)
return;
// Since we own the lock, remove the lock file and our own unique lock file.
sys::fs::remove(LockFileName);
LTO: Keep file handles open for memory mapped files. On Windows we've observed that if you open a file, write to it, map it into memory and close the file handle, the contents of the memory mapping can sometimes be incorrect. That was what we did when adding an entry to the ThinLTO cache using the TempFile and MemoryBuffer classes, and it was causing intermittent build failures on Chromium's ThinLTO bots on Windows. More details are in the associated Chromium bug (crbug.com/786127). We can prevent this from happening by keeping a handle to the file open while the mapping is active. So this patch changes the mapped_file_region class to duplicate the file handle when mapping the file and close it upon unmapping it. One gotcha is that the file handle that we keep open must not have been created with FILE_FLAG_DELETE_ON_CLOSE, as otherwise the operating system will prevent other processes from opening the file. We can achieve this by avoiding the use of FILE_FLAG_DELETE_ON_CLOSE altogether. Instead, we use SetFileInformationByHandle with FileDispositionInfo to manage the delete-on-close bit. This lets us remove the hack that we used to use to clear the delete-on-close bit on a file opened with FILE_FLAG_DELETE_ON_CLOSE. A downside of using SetFileInformationByHandle/FileDispositionInfo as opposed to FILE_FLAG_DELETE_ON_CLOSE is that it prevents us from using CreateFile to open the file while the flag is set, even within the same process. This doesn't seem to matter for almost every client of TempFile, except for LockFileManager, which calls sys::fs::create_link to create a hard link from the lock file, and in the process of doing so tries to open the file. To prevent this change from breaking LockFileManager I changed it to stop using TempFile by effectively reverting r318550. Differential Revision: https://reviews.llvm.org/D48051 llvm-svn: 334630
2018-06-13 20:03:14 +02:00
sys::fs::remove(UniqueLockFileName);
// The unique file is now gone, so remove it from the signal handler. This
// matches a sys::RemoveFileOnSignal() in LockFileManager().
sys::DontRemoveFileOnSignal(UniqueLockFileName);
}
LockFileManager::WaitForUnlockResult
LockFileManager::waitForUnlock(const unsigned MaxSeconds) {
if (getState() != LFS_Shared)
return Res_Success;
Improve module.pcm lock file performance on machines with high core counts Summary: When building a large Xcode project with multiple module dependencies, and mixed Objective-C & Swift, I observed a large number of clang processes stalling at zero CPU for 30+ seconds throughout the build. This was especially prevalent on my 18-core iMac Pro. After some sampling, the major cause appears to be the lock file implementation for precompiled modules in the module cache. When the lock is heavily contended by multiple clang processes, the exponential backoff runs in lockstep, with some of the processes sleeping for 30+ seconds in order to acquire the file lock. In the attached patch, I implemented a more aggressive polling mechanism that limits the sleep interval to a max of 500ms, and randomizes the wait time. I preserved a limited form of exponential backoff. I also updated the code to use cross-platform timing, thread sleep, and random number capabilities available in C++11. On iMac Pro (2.3 GHz Intel Xeon W, 18 core): Xcode 11.1 bundled clang: 502.2 seconds (average of 5 runs) Custom clang build with LockFileManager patch applied: 276.6 seconds (average of 5 runs) This is a 1.82x speedup for this use case. On MacBook Pro (4 core 3.1GHz Intel i7): Xcode 11.1 bundled clang: 539.4 seconds (average of 2 runs) Custom clang build with LockFileManager patch applied: 509.5 seconds (average of 2 runs) As expected, machines with fewer cores benefit less from this change. ``` Call graph: 2992 Thread_393602 DispatchQueue_1: com.apple.main-thread (serial) 2992 start (in libdyld.dylib) + 1 [0x7fff6a1683d5] 2992 main (in clang) + 297 [0x1097a1059] 2992 driver_main(int, char const**) (in clang) + 2803 [0x1097a5513] 2992 cc1_main(llvm::ArrayRef<char const*>, char const*, void*) (in clang) + 1608 [0x1097a7cc8] 2992 clang::ExecuteCompilerInvocation(clang::CompilerInstance*) (in clang) + 3299 [0x1097dace3] 2992 clang::CompilerInstance::ExecuteAction(clang::FrontendAction&) (in clang) + 509 [0x1097dcc1d] 2992 clang::FrontendAction::Execute() (in clang) + 42 [0x109818b3a] 2992 clang::ParseAST(clang::Sema&, bool, bool) (in clang) + 185 [0x10981b369] 2992 clang::Parser::ParseFirstTopLevelDecl(clang::OpaquePtr<clang::DeclGroupRef>&) (in clang) + 37 [0x10983e9b5] 2992 clang::Parser::ParseTopLevelDecl(clang::OpaquePtr<clang::DeclGroupRef>&) (in clang) + 141 [0x10983ecfd] 2992 clang::Parser::ParseExternalDeclaration(clang::Parser::ParsedAttributesWithRange&, clang::ParsingDeclSpec*) (in clang) + 695 [0x10983f3b7] 2992 clang::Parser::ParseObjCAtDirectives(clang::Parser::ParsedAttributesWithRange&) (in clang) + 637 [0x10a9be9bd] 2992 clang::Parser::ParseModuleImport(clang::SourceLocation) (in clang) + 170 [0x10c4841ba] 2992 clang::Parser::ParseModuleName(clang::SourceLocation, llvm::SmallVectorImpl<std::__1::pair<clang::IdentifierInfo*, clang::SourceLocation> >&, bool) (in clang) + 503 [0x10c485267] 2992 clang::Preprocessor::Lex(clang::Token&) (in clang) + 316 [0x1098285cc] 2992 clang::Preprocessor::LexAfterModuleImport(clang::Token&) (in clang) + 690 [0x10cc7af62] 2992 clang::CompilerInstance::loadModule(clang::SourceLocation, llvm::ArrayRef<std::__1::pair<clang::IdentifierInfo*, clang::SourceLocation> >, clang::Module::NameVisibilityKind, bool) (in clang) + 7989 [0x10bba6535] 2992 compileAndLoadModule(clang::CompilerInstance&, clang::SourceLocation, clang::SourceLocation, clang::Module*, llvm::StringRef) (in clang) + 296 [0x10bba8318] 2992 llvm::LockFileManager::waitForUnlock() (in clang) + 91 [0x10b6953ab] 2992 nanosleep (in libsystem_c.dylib) + 199 [0x7fff6a22c914] 2992 __semwait_signal (in libsystem_kernel.dylib) + 10 [0x7fff6a2a0f32] ``` Differential Revision: https://reviews.llvm.org/D69575
2020-03-23 22:16:55 +01:00
// Since we don't yet have an event-based method to wait for the lock file,
// implement randomized exponential backoff, similar to Ethernet collision
// algorithm. This improves performance on machines with high core counts
// when the file lock is heavily contended by multiple clang processes
const unsigned long MinWaitDurationMS = 10;
const unsigned long MaxWaitMultiplier = 50; // 500ms max wait
unsigned long WaitMultiplier = 1;
unsigned long ElapsedTimeSeconds = 0;
std::random_device Device;
std::default_random_engine Engine(Device());
auto StartTime = std::chrono::steady_clock::now();
do {
Improve module.pcm lock file performance on machines with high core counts Summary: When building a large Xcode project with multiple module dependencies, and mixed Objective-C & Swift, I observed a large number of clang processes stalling at zero CPU for 30+ seconds throughout the build. This was especially prevalent on my 18-core iMac Pro. After some sampling, the major cause appears to be the lock file implementation for precompiled modules in the module cache. When the lock is heavily contended by multiple clang processes, the exponential backoff runs in lockstep, with some of the processes sleeping for 30+ seconds in order to acquire the file lock. In the attached patch, I implemented a more aggressive polling mechanism that limits the sleep interval to a max of 500ms, and randomizes the wait time. I preserved a limited form of exponential backoff. I also updated the code to use cross-platform timing, thread sleep, and random number capabilities available in C++11. On iMac Pro (2.3 GHz Intel Xeon W, 18 core): Xcode 11.1 bundled clang: 502.2 seconds (average of 5 runs) Custom clang build with LockFileManager patch applied: 276.6 seconds (average of 5 runs) This is a 1.82x speedup for this use case. On MacBook Pro (4 core 3.1GHz Intel i7): Xcode 11.1 bundled clang: 539.4 seconds (average of 2 runs) Custom clang build with LockFileManager patch applied: 509.5 seconds (average of 2 runs) As expected, machines with fewer cores benefit less from this change. ``` Call graph: 2992 Thread_393602 DispatchQueue_1: com.apple.main-thread (serial) 2992 start (in libdyld.dylib) + 1 [0x7fff6a1683d5] 2992 main (in clang) + 297 [0x1097a1059] 2992 driver_main(int, char const**) (in clang) + 2803 [0x1097a5513] 2992 cc1_main(llvm::ArrayRef<char const*>, char const*, void*) (in clang) + 1608 [0x1097a7cc8] 2992 clang::ExecuteCompilerInvocation(clang::CompilerInstance*) (in clang) + 3299 [0x1097dace3] 2992 clang::CompilerInstance::ExecuteAction(clang::FrontendAction&) (in clang) + 509 [0x1097dcc1d] 2992 clang::FrontendAction::Execute() (in clang) + 42 [0x109818b3a] 2992 clang::ParseAST(clang::Sema&, bool, bool) (in clang) + 185 [0x10981b369] 2992 clang::Parser::ParseFirstTopLevelDecl(clang::OpaquePtr<clang::DeclGroupRef>&) (in clang) + 37 [0x10983e9b5] 2992 clang::Parser::ParseTopLevelDecl(clang::OpaquePtr<clang::DeclGroupRef>&) (in clang) + 141 [0x10983ecfd] 2992 clang::Parser::ParseExternalDeclaration(clang::Parser::ParsedAttributesWithRange&, clang::ParsingDeclSpec*) (in clang) + 695 [0x10983f3b7] 2992 clang::Parser::ParseObjCAtDirectives(clang::Parser::ParsedAttributesWithRange&) (in clang) + 637 [0x10a9be9bd] 2992 clang::Parser::ParseModuleImport(clang::SourceLocation) (in clang) + 170 [0x10c4841ba] 2992 clang::Parser::ParseModuleName(clang::SourceLocation, llvm::SmallVectorImpl<std::__1::pair<clang::IdentifierInfo*, clang::SourceLocation> >&, bool) (in clang) + 503 [0x10c485267] 2992 clang::Preprocessor::Lex(clang::Token&) (in clang) + 316 [0x1098285cc] 2992 clang::Preprocessor::LexAfterModuleImport(clang::Token&) (in clang) + 690 [0x10cc7af62] 2992 clang::CompilerInstance::loadModule(clang::SourceLocation, llvm::ArrayRef<std::__1::pair<clang::IdentifierInfo*, clang::SourceLocation> >, clang::Module::NameVisibilityKind, bool) (in clang) + 7989 [0x10bba6535] 2992 compileAndLoadModule(clang::CompilerInstance&, clang::SourceLocation, clang::SourceLocation, clang::Module*, llvm::StringRef) (in clang) + 296 [0x10bba8318] 2992 llvm::LockFileManager::waitForUnlock() (in clang) + 91 [0x10b6953ab] 2992 nanosleep (in libsystem_c.dylib) + 199 [0x7fff6a22c914] 2992 __semwait_signal (in libsystem_kernel.dylib) + 10 [0x7fff6a2a0f32] ``` Differential Revision: https://reviews.llvm.org/D69575
2020-03-23 22:16:55 +01:00
// FIXME: implement event-based waiting
// Sleep for the designated interval, to allow the owning process time to
// finish up and remove the lock file.
Improve module.pcm lock file performance on machines with high core counts Summary: When building a large Xcode project with multiple module dependencies, and mixed Objective-C & Swift, I observed a large number of clang processes stalling at zero CPU for 30+ seconds throughout the build. This was especially prevalent on my 18-core iMac Pro. After some sampling, the major cause appears to be the lock file implementation for precompiled modules in the module cache. When the lock is heavily contended by multiple clang processes, the exponential backoff runs in lockstep, with some of the processes sleeping for 30+ seconds in order to acquire the file lock. In the attached patch, I implemented a more aggressive polling mechanism that limits the sleep interval to a max of 500ms, and randomizes the wait time. I preserved a limited form of exponential backoff. I also updated the code to use cross-platform timing, thread sleep, and random number capabilities available in C++11. On iMac Pro (2.3 GHz Intel Xeon W, 18 core): Xcode 11.1 bundled clang: 502.2 seconds (average of 5 runs) Custom clang build with LockFileManager patch applied: 276.6 seconds (average of 5 runs) This is a 1.82x speedup for this use case. On MacBook Pro (4 core 3.1GHz Intel i7): Xcode 11.1 bundled clang: 539.4 seconds (average of 2 runs) Custom clang build with LockFileManager patch applied: 509.5 seconds (average of 2 runs) As expected, machines with fewer cores benefit less from this change. ``` Call graph: 2992 Thread_393602 DispatchQueue_1: com.apple.main-thread (serial) 2992 start (in libdyld.dylib) + 1 [0x7fff6a1683d5] 2992 main (in clang) + 297 [0x1097a1059] 2992 driver_main(int, char const**) (in clang) + 2803 [0x1097a5513] 2992 cc1_main(llvm::ArrayRef<char const*>, char const*, void*) (in clang) + 1608 [0x1097a7cc8] 2992 clang::ExecuteCompilerInvocation(clang::CompilerInstance*) (in clang) + 3299 [0x1097dace3] 2992 clang::CompilerInstance::ExecuteAction(clang::FrontendAction&) (in clang) + 509 [0x1097dcc1d] 2992 clang::FrontendAction::Execute() (in clang) + 42 [0x109818b3a] 2992 clang::ParseAST(clang::Sema&, bool, bool) (in clang) + 185 [0x10981b369] 2992 clang::Parser::ParseFirstTopLevelDecl(clang::OpaquePtr<clang::DeclGroupRef>&) (in clang) + 37 [0x10983e9b5] 2992 clang::Parser::ParseTopLevelDecl(clang::OpaquePtr<clang::DeclGroupRef>&) (in clang) + 141 [0x10983ecfd] 2992 clang::Parser::ParseExternalDeclaration(clang::Parser::ParsedAttributesWithRange&, clang::ParsingDeclSpec*) (in clang) + 695 [0x10983f3b7] 2992 clang::Parser::ParseObjCAtDirectives(clang::Parser::ParsedAttributesWithRange&) (in clang) + 637 [0x10a9be9bd] 2992 clang::Parser::ParseModuleImport(clang::SourceLocation) (in clang) + 170 [0x10c4841ba] 2992 clang::Parser::ParseModuleName(clang::SourceLocation, llvm::SmallVectorImpl<std::__1::pair<clang::IdentifierInfo*, clang::SourceLocation> >&, bool) (in clang) + 503 [0x10c485267] 2992 clang::Preprocessor::Lex(clang::Token&) (in clang) + 316 [0x1098285cc] 2992 clang::Preprocessor::LexAfterModuleImport(clang::Token&) (in clang) + 690 [0x10cc7af62] 2992 clang::CompilerInstance::loadModule(clang::SourceLocation, llvm::ArrayRef<std::__1::pair<clang::IdentifierInfo*, clang::SourceLocation> >, clang::Module::NameVisibilityKind, bool) (in clang) + 7989 [0x10bba6535] 2992 compileAndLoadModule(clang::CompilerInstance&, clang::SourceLocation, clang::SourceLocation, clang::Module*, llvm::StringRef) (in clang) + 296 [0x10bba8318] 2992 llvm::LockFileManager::waitForUnlock() (in clang) + 91 [0x10b6953ab] 2992 nanosleep (in libsystem_c.dylib) + 199 [0x7fff6a22c914] 2992 __semwait_signal (in libsystem_kernel.dylib) + 10 [0x7fff6a2a0f32] ``` Differential Revision: https://reviews.llvm.org/D69575
2020-03-23 22:16:55 +01:00
std::uniform_int_distribution<unsigned long> Distribution(1,
WaitMultiplier);
unsigned long WaitDurationMS = MinWaitDurationMS * Distribution(Engine);
std::this_thread::sleep_for(std::chrono::milliseconds(WaitDurationMS));
if (sys::fs::access(LockFileName.c_str(), sys::fs::AccessMode::Exist) ==
errc::no_such_file_or_directory) {
// If the original file wasn't created, somone thought the lock was dead.
if (!sys::fs::exists(FileName))
return Res_OwnerDied;
return Res_Success;
}
// If the process owning the lock died without cleaning up, just bail out.
if (!processStillExecuting((*Owner).first, (*Owner).second))
return Res_OwnerDied;
Improve module.pcm lock file performance on machines with high core counts Summary: When building a large Xcode project with multiple module dependencies, and mixed Objective-C & Swift, I observed a large number of clang processes stalling at zero CPU for 30+ seconds throughout the build. This was especially prevalent on my 18-core iMac Pro. After some sampling, the major cause appears to be the lock file implementation for precompiled modules in the module cache. When the lock is heavily contended by multiple clang processes, the exponential backoff runs in lockstep, with some of the processes sleeping for 30+ seconds in order to acquire the file lock. In the attached patch, I implemented a more aggressive polling mechanism that limits the sleep interval to a max of 500ms, and randomizes the wait time. I preserved a limited form of exponential backoff. I also updated the code to use cross-platform timing, thread sleep, and random number capabilities available in C++11. On iMac Pro (2.3 GHz Intel Xeon W, 18 core): Xcode 11.1 bundled clang: 502.2 seconds (average of 5 runs) Custom clang build with LockFileManager patch applied: 276.6 seconds (average of 5 runs) This is a 1.82x speedup for this use case. On MacBook Pro (4 core 3.1GHz Intel i7): Xcode 11.1 bundled clang: 539.4 seconds (average of 2 runs) Custom clang build with LockFileManager patch applied: 509.5 seconds (average of 2 runs) As expected, machines with fewer cores benefit less from this change. ``` Call graph: 2992 Thread_393602 DispatchQueue_1: com.apple.main-thread (serial) 2992 start (in libdyld.dylib) + 1 [0x7fff6a1683d5] 2992 main (in clang) + 297 [0x1097a1059] 2992 driver_main(int, char const**) (in clang) + 2803 [0x1097a5513] 2992 cc1_main(llvm::ArrayRef<char const*>, char const*, void*) (in clang) + 1608 [0x1097a7cc8] 2992 clang::ExecuteCompilerInvocation(clang::CompilerInstance*) (in clang) + 3299 [0x1097dace3] 2992 clang::CompilerInstance::ExecuteAction(clang::FrontendAction&) (in clang) + 509 [0x1097dcc1d] 2992 clang::FrontendAction::Execute() (in clang) + 42 [0x109818b3a] 2992 clang::ParseAST(clang::Sema&, bool, bool) (in clang) + 185 [0x10981b369] 2992 clang::Parser::ParseFirstTopLevelDecl(clang::OpaquePtr<clang::DeclGroupRef>&) (in clang) + 37 [0x10983e9b5] 2992 clang::Parser::ParseTopLevelDecl(clang::OpaquePtr<clang::DeclGroupRef>&) (in clang) + 141 [0x10983ecfd] 2992 clang::Parser::ParseExternalDeclaration(clang::Parser::ParsedAttributesWithRange&, clang::ParsingDeclSpec*) (in clang) + 695 [0x10983f3b7] 2992 clang::Parser::ParseObjCAtDirectives(clang::Parser::ParsedAttributesWithRange&) (in clang) + 637 [0x10a9be9bd] 2992 clang::Parser::ParseModuleImport(clang::SourceLocation) (in clang) + 170 [0x10c4841ba] 2992 clang::Parser::ParseModuleName(clang::SourceLocation, llvm::SmallVectorImpl<std::__1::pair<clang::IdentifierInfo*, clang::SourceLocation> >&, bool) (in clang) + 503 [0x10c485267] 2992 clang::Preprocessor::Lex(clang::Token&) (in clang) + 316 [0x1098285cc] 2992 clang::Preprocessor::LexAfterModuleImport(clang::Token&) (in clang) + 690 [0x10cc7af62] 2992 clang::CompilerInstance::loadModule(clang::SourceLocation, llvm::ArrayRef<std::__1::pair<clang::IdentifierInfo*, clang::SourceLocation> >, clang::Module::NameVisibilityKind, bool) (in clang) + 7989 [0x10bba6535] 2992 compileAndLoadModule(clang::CompilerInstance&, clang::SourceLocation, clang::SourceLocation, clang::Module*, llvm::StringRef) (in clang) + 296 [0x10bba8318] 2992 llvm::LockFileManager::waitForUnlock() (in clang) + 91 [0x10b6953ab] 2992 nanosleep (in libsystem_c.dylib) + 199 [0x7fff6a22c914] 2992 __semwait_signal (in libsystem_kernel.dylib) + 10 [0x7fff6a2a0f32] ``` Differential Revision: https://reviews.llvm.org/D69575
2020-03-23 22:16:55 +01:00
WaitMultiplier *= 2;
if (WaitMultiplier > MaxWaitMultiplier) {
WaitMultiplier = MaxWaitMultiplier;
}
Improve module.pcm lock file performance on machines with high core counts Summary: When building a large Xcode project with multiple module dependencies, and mixed Objective-C & Swift, I observed a large number of clang processes stalling at zero CPU for 30+ seconds throughout the build. This was especially prevalent on my 18-core iMac Pro. After some sampling, the major cause appears to be the lock file implementation for precompiled modules in the module cache. When the lock is heavily contended by multiple clang processes, the exponential backoff runs in lockstep, with some of the processes sleeping for 30+ seconds in order to acquire the file lock. In the attached patch, I implemented a more aggressive polling mechanism that limits the sleep interval to a max of 500ms, and randomizes the wait time. I preserved a limited form of exponential backoff. I also updated the code to use cross-platform timing, thread sleep, and random number capabilities available in C++11. On iMac Pro (2.3 GHz Intel Xeon W, 18 core): Xcode 11.1 bundled clang: 502.2 seconds (average of 5 runs) Custom clang build with LockFileManager patch applied: 276.6 seconds (average of 5 runs) This is a 1.82x speedup for this use case. On MacBook Pro (4 core 3.1GHz Intel i7): Xcode 11.1 bundled clang: 539.4 seconds (average of 2 runs) Custom clang build with LockFileManager patch applied: 509.5 seconds (average of 2 runs) As expected, machines with fewer cores benefit less from this change. ``` Call graph: 2992 Thread_393602 DispatchQueue_1: com.apple.main-thread (serial) 2992 start (in libdyld.dylib) + 1 [0x7fff6a1683d5] 2992 main (in clang) + 297 [0x1097a1059] 2992 driver_main(int, char const**) (in clang) + 2803 [0x1097a5513] 2992 cc1_main(llvm::ArrayRef<char const*>, char const*, void*) (in clang) + 1608 [0x1097a7cc8] 2992 clang::ExecuteCompilerInvocation(clang::CompilerInstance*) (in clang) + 3299 [0x1097dace3] 2992 clang::CompilerInstance::ExecuteAction(clang::FrontendAction&) (in clang) + 509 [0x1097dcc1d] 2992 clang::FrontendAction::Execute() (in clang) + 42 [0x109818b3a] 2992 clang::ParseAST(clang::Sema&, bool, bool) (in clang) + 185 [0x10981b369] 2992 clang::Parser::ParseFirstTopLevelDecl(clang::OpaquePtr<clang::DeclGroupRef>&) (in clang) + 37 [0x10983e9b5] 2992 clang::Parser::ParseTopLevelDecl(clang::OpaquePtr<clang::DeclGroupRef>&) (in clang) + 141 [0x10983ecfd] 2992 clang::Parser::ParseExternalDeclaration(clang::Parser::ParsedAttributesWithRange&, clang::ParsingDeclSpec*) (in clang) + 695 [0x10983f3b7] 2992 clang::Parser::ParseObjCAtDirectives(clang::Parser::ParsedAttributesWithRange&) (in clang) + 637 [0x10a9be9bd] 2992 clang::Parser::ParseModuleImport(clang::SourceLocation) (in clang) + 170 [0x10c4841ba] 2992 clang::Parser::ParseModuleName(clang::SourceLocation, llvm::SmallVectorImpl<std::__1::pair<clang::IdentifierInfo*, clang::SourceLocation> >&, bool) (in clang) + 503 [0x10c485267] 2992 clang::Preprocessor::Lex(clang::Token&) (in clang) + 316 [0x1098285cc] 2992 clang::Preprocessor::LexAfterModuleImport(clang::Token&) (in clang) + 690 [0x10cc7af62] 2992 clang::CompilerInstance::loadModule(clang::SourceLocation, llvm::ArrayRef<std::__1::pair<clang::IdentifierInfo*, clang::SourceLocation> >, clang::Module::NameVisibilityKind, bool) (in clang) + 7989 [0x10bba6535] 2992 compileAndLoadModule(clang::CompilerInstance&, clang::SourceLocation, clang::SourceLocation, clang::Module*, llvm::StringRef) (in clang) + 296 [0x10bba8318] 2992 llvm::LockFileManager::waitForUnlock() (in clang) + 91 [0x10b6953ab] 2992 nanosleep (in libsystem_c.dylib) + 199 [0x7fff6a22c914] 2992 __semwait_signal (in libsystem_kernel.dylib) + 10 [0x7fff6a2a0f32] ``` Differential Revision: https://reviews.llvm.org/D69575
2020-03-23 22:16:55 +01:00
ElapsedTimeSeconds = std::chrono::duration_cast<std::chrono::seconds>(
std::chrono::steady_clock::now() - StartTime)
.count();
} while (ElapsedTimeSeconds < MaxSeconds);
// Give up.
return Res_Timeout;
}
std::error_code LockFileManager::unsafeRemoveLockFile() {
return sys::fs::remove(LockFileName);
}