mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
Re-land [Support] On Windows, take the affinity mask into account
The number of hardware threads available to a ThreadPool can be limited if setting an affinity mask. For example: > start /B /AFFINITY 0xF lld-link.exe ... Would let LLD only use 4 hyper-threads. Previously, there was an outstanding issue on Windows Server 2019 on dual-CPU machines, which was preventing from using both CPU sockets. In normal conditions, when no affinity mask was set, ProcessorGroup::AllThreads was different from ProcessorGroup::UsableThreads. The previous code in llvm/lib/Support/Windows/Threading.inc L201 was improperly assuming those two values to be equal, and consequently was limiting the execution to only one CPU socket. Differential Revision: https://reviews.llvm.org/D92419
This commit is contained in:
parent
4b3a70fc30
commit
11bbe8413e
@ -14,6 +14,7 @@
|
|||||||
#define LLVM_SUPPORT_PROGRAM_H
|
#define LLVM_SUPPORT_PROGRAM_H
|
||||||
|
|
||||||
#include "llvm/ADT/ArrayRef.h"
|
#include "llvm/ADT/ArrayRef.h"
|
||||||
|
#include "llvm/ADT/BitVector.h"
|
||||||
#include "llvm/ADT/Optional.h"
|
#include "llvm/ADT/Optional.h"
|
||||||
#include "llvm/ADT/StringRef.h"
|
#include "llvm/ADT/StringRef.h"
|
||||||
#include "llvm/Config/llvm-config.h"
|
#include "llvm/Config/llvm-config.h"
|
||||||
@ -125,9 +126,11 @@ namespace sys {
|
|||||||
///< string is non-empty upon return an error occurred while invoking the
|
///< string is non-empty upon return an error occurred while invoking the
|
||||||
///< program.
|
///< program.
|
||||||
bool *ExecutionFailed = nullptr,
|
bool *ExecutionFailed = nullptr,
|
||||||
Optional<ProcessStatistics> *ProcStat = nullptr ///< If non-zero, provides
|
Optional<ProcessStatistics> *ProcStat = nullptr, ///< If non-zero,
|
||||||
/// a pointer to a structure in which process execution statistics will be
|
/// provides a pointer to a structure in which process execution
|
||||||
/// stored.
|
/// statistics will be stored.
|
||||||
|
BitVector *AffinityMask = nullptr ///< CPUs or processors the new
|
||||||
|
/// program shall run on.
|
||||||
);
|
);
|
||||||
|
|
||||||
/// Similar to ExecuteAndWait, but returns immediately.
|
/// Similar to ExecuteAndWait, but returns immediately.
|
||||||
@ -140,7 +143,8 @@ namespace sys {
|
|||||||
ArrayRef<Optional<StringRef>> Redirects = {},
|
ArrayRef<Optional<StringRef>> Redirects = {},
|
||||||
unsigned MemoryLimit = 0,
|
unsigned MemoryLimit = 0,
|
||||||
std::string *ErrMsg = nullptr,
|
std::string *ErrMsg = nullptr,
|
||||||
bool *ExecutionFailed = nullptr);
|
bool *ExecutionFailed = nullptr,
|
||||||
|
BitVector *AffinityMask = nullptr);
|
||||||
|
|
||||||
/// Return true if the given arguments fit within system-specific
|
/// Return true if the given arguments fit within system-specific
|
||||||
/// argument length limits.
|
/// argument length limits.
|
||||||
|
@ -26,17 +26,20 @@ using namespace sys;
|
|||||||
static bool Execute(ProcessInfo &PI, StringRef Program,
|
static bool Execute(ProcessInfo &PI, StringRef Program,
|
||||||
ArrayRef<StringRef> Args, Optional<ArrayRef<StringRef>> Env,
|
ArrayRef<StringRef> Args, Optional<ArrayRef<StringRef>> Env,
|
||||||
ArrayRef<Optional<StringRef>> Redirects,
|
ArrayRef<Optional<StringRef>> Redirects,
|
||||||
unsigned MemoryLimit, std::string *ErrMsg);
|
unsigned MemoryLimit, std::string *ErrMsg,
|
||||||
|
BitVector *AffinityMask);
|
||||||
|
|
||||||
int sys::ExecuteAndWait(StringRef Program, ArrayRef<StringRef> Args,
|
int sys::ExecuteAndWait(StringRef Program, ArrayRef<StringRef> Args,
|
||||||
Optional<ArrayRef<StringRef>> Env,
|
Optional<ArrayRef<StringRef>> Env,
|
||||||
ArrayRef<Optional<StringRef>> Redirects,
|
ArrayRef<Optional<StringRef>> Redirects,
|
||||||
unsigned SecondsToWait, unsigned MemoryLimit,
|
unsigned SecondsToWait, unsigned MemoryLimit,
|
||||||
std::string *ErrMsg, bool *ExecutionFailed,
|
std::string *ErrMsg, bool *ExecutionFailed,
|
||||||
Optional<ProcessStatistics> *ProcStat) {
|
Optional<ProcessStatistics> *ProcStat,
|
||||||
|
BitVector *AffinityMask) {
|
||||||
assert(Redirects.empty() || Redirects.size() == 3);
|
assert(Redirects.empty() || Redirects.size() == 3);
|
||||||
ProcessInfo PI;
|
ProcessInfo PI;
|
||||||
if (Execute(PI, Program, Args, Env, Redirects, MemoryLimit, ErrMsg)) {
|
if (Execute(PI, Program, Args, Env, Redirects, MemoryLimit, ErrMsg,
|
||||||
|
AffinityMask)) {
|
||||||
if (ExecutionFailed)
|
if (ExecutionFailed)
|
||||||
*ExecutionFailed = false;
|
*ExecutionFailed = false;
|
||||||
ProcessInfo Result =
|
ProcessInfo Result =
|
||||||
@ -55,12 +58,13 @@ ProcessInfo sys::ExecuteNoWait(StringRef Program, ArrayRef<StringRef> Args,
|
|||||||
Optional<ArrayRef<StringRef>> Env,
|
Optional<ArrayRef<StringRef>> Env,
|
||||||
ArrayRef<Optional<StringRef>> Redirects,
|
ArrayRef<Optional<StringRef>> Redirects,
|
||||||
unsigned MemoryLimit, std::string *ErrMsg,
|
unsigned MemoryLimit, std::string *ErrMsg,
|
||||||
bool *ExecutionFailed) {
|
bool *ExecutionFailed, BitVector *AffinityMask) {
|
||||||
assert(Redirects.empty() || Redirects.size() == 3);
|
assert(Redirects.empty() || Redirects.size() == 3);
|
||||||
ProcessInfo PI;
|
ProcessInfo PI;
|
||||||
if (ExecutionFailed)
|
if (ExecutionFailed)
|
||||||
*ExecutionFailed = false;
|
*ExecutionFailed = false;
|
||||||
if (!Execute(PI, Program, Args, Env, Redirects, MemoryLimit, ErrMsg))
|
if (!Execute(PI, Program, Args, Env, Redirects, MemoryLimit, ErrMsg,
|
||||||
|
AffinityMask))
|
||||||
if (ExecutionFailed)
|
if (ExecutionFailed)
|
||||||
*ExecutionFailed = true;
|
*ExecutionFailed = true;
|
||||||
|
|
||||||
|
@ -174,7 +174,8 @@ toNullTerminatedCStringArray(ArrayRef<StringRef> Strings, StringSaver &Saver) {
|
|||||||
static bool Execute(ProcessInfo &PI, StringRef Program,
|
static bool Execute(ProcessInfo &PI, StringRef Program,
|
||||||
ArrayRef<StringRef> Args, Optional<ArrayRef<StringRef>> Env,
|
ArrayRef<StringRef> Args, Optional<ArrayRef<StringRef>> Env,
|
||||||
ArrayRef<Optional<StringRef>> Redirects,
|
ArrayRef<Optional<StringRef>> Redirects,
|
||||||
unsigned MemoryLimit, std::string *ErrMsg) {
|
unsigned MemoryLimit, std::string *ErrMsg,
|
||||||
|
BitVector *AffinityMask) {
|
||||||
if (!llvm::sys::fs::exists(Program)) {
|
if (!llvm::sys::fs::exists(Program)) {
|
||||||
if (ErrMsg)
|
if (ErrMsg)
|
||||||
*ErrMsg = std::string("Executable \"") + Program.str() +
|
*ErrMsg = std::string("Executable \"") + Program.str() +
|
||||||
@ -182,6 +183,9 @@ static bool Execute(ProcessInfo &PI, StringRef Program,
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
assert(!AffinityMask && "Starting a process with an affinity mask is "
|
||||||
|
"currently not supported on Unix!");
|
||||||
|
|
||||||
BumpPtrAllocator Allocator;
|
BumpPtrAllocator Allocator;
|
||||||
StringSaver Saver(Allocator);
|
StringSaver Saver(Allocator);
|
||||||
std::vector<const char *> ArgVector, EnvVector;
|
std::vector<const char *> ArgVector, EnvVector;
|
||||||
|
@ -171,7 +171,8 @@ static HANDLE RedirectIO(Optional<StringRef> Path, int fd,
|
|||||||
static bool Execute(ProcessInfo &PI, StringRef Program,
|
static bool Execute(ProcessInfo &PI, StringRef Program,
|
||||||
ArrayRef<StringRef> Args, Optional<ArrayRef<StringRef>> Env,
|
ArrayRef<StringRef> Args, Optional<ArrayRef<StringRef>> Env,
|
||||||
ArrayRef<Optional<StringRef>> Redirects,
|
ArrayRef<Optional<StringRef>> Redirects,
|
||||||
unsigned MemoryLimit, std::string *ErrMsg) {
|
unsigned MemoryLimit, std::string *ErrMsg,
|
||||||
|
BitVector *AffinityMask) {
|
||||||
if (!sys::fs::can_execute(Program)) {
|
if (!sys::fs::can_execute(Program)) {
|
||||||
if (ErrMsg)
|
if (ErrMsg)
|
||||||
*ErrMsg = "program not executable";
|
*ErrMsg = "program not executable";
|
||||||
@ -277,11 +278,15 @@ static bool Execute(ProcessInfo &PI, StringRef Program,
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
unsigned CreateFlags = CREATE_UNICODE_ENVIRONMENT;
|
||||||
|
if (AffinityMask)
|
||||||
|
CreateFlags |= CREATE_SUSPENDED;
|
||||||
|
|
||||||
std::vector<wchar_t> CommandUtf16(Command.size() + 1, 0);
|
std::vector<wchar_t> CommandUtf16(Command.size() + 1, 0);
|
||||||
std::copy(Command.begin(), Command.end(), CommandUtf16.begin());
|
std::copy(Command.begin(), Command.end(), CommandUtf16.begin());
|
||||||
BOOL rc = CreateProcessW(ProgramUtf16.data(), CommandUtf16.data(), 0, 0, TRUE,
|
BOOL rc = CreateProcessW(ProgramUtf16.data(), CommandUtf16.data(), 0, 0, TRUE,
|
||||||
CREATE_UNICODE_ENVIRONMENT,
|
CreateFlags, EnvBlock.empty() ? 0 : EnvBlock.data(),
|
||||||
EnvBlock.empty() ? 0 : EnvBlock.data(), 0, &si, &pi);
|
0, &si, &pi);
|
||||||
DWORD err = GetLastError();
|
DWORD err = GetLastError();
|
||||||
|
|
||||||
// Regardless of whether the process got created or not, we are done with
|
// Regardless of whether the process got created or not, we are done with
|
||||||
@ -329,6 +334,13 @@ static bool Execute(ProcessInfo &PI, StringRef Program,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Set the affinity mask
|
||||||
|
if (AffinityMask) {
|
||||||
|
::SetProcessAffinityMask(pi.hProcess,
|
||||||
|
(DWORD_PTR)AffinityMask->getData().front());
|
||||||
|
::ResumeThread(pi.hThread);
|
||||||
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -195,14 +195,27 @@ static ArrayRef<ProcessorGroup> getProcessorGroups() {
|
|||||||
if (!IterateProcInfo(RelationProcessorCore, HandleProc))
|
if (!IterateProcInfo(RelationProcessorCore, HandleProc))
|
||||||
return std::vector<ProcessorGroup>();
|
return std::vector<ProcessorGroup>();
|
||||||
|
|
||||||
// If there's an affinity mask set on one of the CPUs, then assume the user
|
// If there's an affinity mask set, assume the user wants to constrain the
|
||||||
// wants to constrain the current process to only a single CPU.
|
// current process to only a single CPU group. On Windows, it is not
|
||||||
for (auto &G : Groups) {
|
// possible for affinity masks to cross CPU group boundaries.
|
||||||
if (G.UsableThreads != G.AllThreads) {
|
DWORD_PTR ProcessAffinityMask = 0, SystemAffinityMask = 0;
|
||||||
ProcessorGroup NewG{G};
|
if (::GetProcessAffinityMask(GetCurrentProcess(), &ProcessAffinityMask,
|
||||||
|
&SystemAffinityMask) &&
|
||||||
|
ProcessAffinityMask != SystemAffinityMask) {
|
||||||
|
// We don't expect more that 4 CPU groups on Windows (256 processors).
|
||||||
|
USHORT GroupCount = 4;
|
||||||
|
USHORT GroupArray[4]{};
|
||||||
|
if (::GetProcessGroupAffinity(GetCurrentProcess(), &GroupCount,
|
||||||
|
GroupArray)) {
|
||||||
|
assert(GroupCount == 1 &&
|
||||||
|
"On startup, a program is expected to be assigned only to "
|
||||||
|
"one processor group!");
|
||||||
|
unsigned CurrentGroupID = GroupArray[0];
|
||||||
|
ProcessorGroup NewG{Groups[CurrentGroupID]};
|
||||||
|
NewG.Affinity = ProcessAffinityMask;
|
||||||
|
NewG.UsableThreads = countPopulation(ProcessAffinityMask);
|
||||||
Groups.clear();
|
Groups.clear();
|
||||||
Groups.push_back(NewG);
|
Groups.push_back(NewG);
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -8,11 +8,13 @@
|
|||||||
|
|
||||||
#include "llvm/Support/ThreadPool.h"
|
#include "llvm/Support/ThreadPool.h"
|
||||||
|
|
||||||
#include "llvm/ADT/DenseSet.h"
|
|
||||||
#include "llvm/ADT/STLExtras.h"
|
#include "llvm/ADT/STLExtras.h"
|
||||||
|
#include "llvm/ADT/SetVector.h"
|
||||||
#include "llvm/ADT/SmallVector.h"
|
#include "llvm/ADT/SmallVector.h"
|
||||||
#include "llvm/ADT/Triple.h"
|
#include "llvm/ADT/Triple.h"
|
||||||
|
#include "llvm/Support/CommandLine.h"
|
||||||
#include "llvm/Support/Host.h"
|
#include "llvm/Support/Host.h"
|
||||||
|
#include "llvm/Support/Program.h"
|
||||||
#include "llvm/Support/TargetSelect.h"
|
#include "llvm/Support/TargetSelect.h"
|
||||||
#include "llvm/Support/Threading.h"
|
#include "llvm/Support/Threading.h"
|
||||||
|
|
||||||
@ -45,6 +47,14 @@ protected:
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool isWindows() {
|
||||||
|
// FIXME: Skip some tests below on non-Windows because multi-socket systems
|
||||||
|
// were not fully tested on Unix yet, and llvm::get_thread_affinity_mask()
|
||||||
|
// isn't implemented for Unix.
|
||||||
|
Triple Host(Triple::normalize(sys::getProcessTriple()));
|
||||||
|
return Host.isOSWindows();
|
||||||
|
}
|
||||||
|
|
||||||
ThreadPoolTest() {
|
ThreadPoolTest() {
|
||||||
// Add unsupported configuration here, example:
|
// Add unsupported configuration here, example:
|
||||||
// UnsupportedArchs.push_back(Triple::x86_64);
|
// UnsupportedArchs.push_back(Triple::x86_64);
|
||||||
@ -71,7 +81,7 @@ protected:
|
|||||||
|
|
||||||
void SetUp() override { MainThreadReady = false; }
|
void SetUp() override { MainThreadReady = false; }
|
||||||
|
|
||||||
void RunOnAllSockets(ThreadPoolStrategy S);
|
std::vector<llvm::BitVector> RunOnAllSockets(ThreadPoolStrategy S);
|
||||||
|
|
||||||
std::condition_variable WaitMainThread;
|
std::condition_variable WaitMainThread;
|
||||||
std::mutex WaitMainThreadMutex;
|
std::mutex WaitMainThreadMutex;
|
||||||
@ -82,7 +92,13 @@ protected:
|
|||||||
do { \
|
do { \
|
||||||
if (isUnsupportedOSOrEnvironment()) \
|
if (isUnsupportedOSOrEnvironment()) \
|
||||||
return; \
|
return; \
|
||||||
} while (0); \
|
} while (0);
|
||||||
|
|
||||||
|
#define SKIP_NON_WINDOWS() \
|
||||||
|
do { \
|
||||||
|
if (!isWindows()) \
|
||||||
|
return; \
|
||||||
|
} while (0);
|
||||||
|
|
||||||
TEST_F(ThreadPoolTest, AsyncBarrier) {
|
TEST_F(ThreadPoolTest, AsyncBarrier) {
|
||||||
CHECK_UNSUPPORTED();
|
CHECK_UNSUPPORTED();
|
||||||
@ -169,15 +185,9 @@ TEST_F(ThreadPoolTest, PoolDestruction) {
|
|||||||
|
|
||||||
#if LLVM_ENABLE_THREADS == 1
|
#if LLVM_ENABLE_THREADS == 1
|
||||||
|
|
||||||
void ThreadPoolTest::RunOnAllSockets(ThreadPoolStrategy S) {
|
std::vector<llvm::BitVector>
|
||||||
// FIXME: Skip these tests on non-Windows because multi-socket system were not
|
ThreadPoolTest::RunOnAllSockets(ThreadPoolStrategy S) {
|
||||||
// tested on Unix yet, and llvm::get_thread_affinity_mask() isn't implemented
|
llvm::SetVector<llvm::BitVector> ThreadsUsed;
|
||||||
// for Unix.
|
|
||||||
Triple Host(Triple::normalize(sys::getProcessTriple()));
|
|
||||||
if (!Host.isOSWindows())
|
|
||||||
return;
|
|
||||||
|
|
||||||
llvm::DenseSet<llvm::BitVector> ThreadsUsed;
|
|
||||||
std::mutex Lock;
|
std::mutex Lock;
|
||||||
{
|
{
|
||||||
std::condition_variable AllThreads;
|
std::condition_variable AllThreads;
|
||||||
@ -198,7 +208,7 @@ void ThreadPoolTest::RunOnAllSockets(ThreadPoolStrategy S) {
|
|||||||
ThreadsUsed.insert(Mask);
|
ThreadsUsed.insert(Mask);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
ASSERT_EQ(true, ThreadsUsed.empty());
|
EXPECT_EQ(true, ThreadsUsed.empty());
|
||||||
{
|
{
|
||||||
std::unique_lock<std::mutex> Guard(AllThreadsLock);
|
std::unique_lock<std::mutex> Guard(AllThreadsLock);
|
||||||
AllThreads.wait(Guard,
|
AllThreads.wait(Guard,
|
||||||
@ -206,17 +216,68 @@ void ThreadPoolTest::RunOnAllSockets(ThreadPoolStrategy S) {
|
|||||||
}
|
}
|
||||||
setMainThreadReady();
|
setMainThreadReady();
|
||||||
}
|
}
|
||||||
ASSERT_EQ(llvm::get_cpus(), ThreadsUsed.size());
|
return ThreadsUsed.takeVector();
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(ThreadPoolTest, AllThreads_UseAllRessources) {
|
TEST_F(ThreadPoolTest, AllThreads_UseAllRessources) {
|
||||||
CHECK_UNSUPPORTED();
|
CHECK_UNSUPPORTED();
|
||||||
RunOnAllSockets({});
|
SKIP_NON_WINDOWS();
|
||||||
|
std::vector<llvm::BitVector> ThreadsUsed = RunOnAllSockets({});
|
||||||
|
ASSERT_EQ(llvm::get_cpus(), ThreadsUsed.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(ThreadPoolTest, AllThreads_OneThreadPerCore) {
|
TEST_F(ThreadPoolTest, AllThreads_OneThreadPerCore) {
|
||||||
CHECK_UNSUPPORTED();
|
CHECK_UNSUPPORTED();
|
||||||
|
SKIP_NON_WINDOWS();
|
||||||
|
std::vector<llvm::BitVector> ThreadsUsed =
|
||||||
RunOnAllSockets(llvm::heavyweight_hardware_concurrency());
|
RunOnAllSockets(llvm::heavyweight_hardware_concurrency());
|
||||||
|
ASSERT_EQ(llvm::get_cpus(), ThreadsUsed.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// From TestMain.cpp.
|
||||||
|
extern const char *TestMainArgv0;
|
||||||
|
|
||||||
|
// Just a reachable symbol to ease resolving of the executable's path.
|
||||||
|
static cl::opt<std::string> ThreadPoolTestStringArg1("thread-pool-string-arg1");
|
||||||
|
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
#define setenv(name, var, ignore) _putenv_s(name, var)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
TEST_F(ThreadPoolTest, AffinityMask) {
|
||||||
|
CHECK_UNSUPPORTED();
|
||||||
|
|
||||||
|
// FIXME: implement AffinityMask in Support/Unix/Program.inc
|
||||||
|
SKIP_NON_WINDOWS();
|
||||||
|
|
||||||
|
// Skip this test if less than 4 threads are available.
|
||||||
|
if (llvm::hardware_concurrency().compute_thread_count() < 4)
|
||||||
|
return;
|
||||||
|
|
||||||
|
using namespace llvm::sys;
|
||||||
|
if (getenv("LLVM_THREADPOOL_AFFINITYMASK")) {
|
||||||
|
std::vector<llvm::BitVector> ThreadsUsed = RunOnAllSockets({});
|
||||||
|
// Ensure the threads only ran on CPUs 0-3.
|
||||||
|
for (auto &It : ThreadsUsed)
|
||||||
|
ASSERT_LT(It.getData().front(), 16UL);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
std::string Executable =
|
||||||
|
sys::fs::getMainExecutable(TestMainArgv0, &ThreadPoolTestStringArg1);
|
||||||
|
StringRef argv[] = {Executable, "--gtest_filter=ThreadPoolTest.AffinityMask"};
|
||||||
|
|
||||||
|
// Add environment variable to the environment of the child process.
|
||||||
|
int Res = setenv("LLVM_THREADPOOL_AFFINITYMASK", "1", false);
|
||||||
|
ASSERT_EQ(Res, 0);
|
||||||
|
|
||||||
|
std::string Error;
|
||||||
|
bool ExecutionFailed;
|
||||||
|
BitVector Affinity;
|
||||||
|
Affinity.resize(4);
|
||||||
|
Affinity.set(0, 4); // Use CPUs 0,1,2,3.
|
||||||
|
int Ret = sys::ExecuteAndWait(Executable, argv, {}, {}, 0, 0, &Error,
|
||||||
|
&ExecutionFailed, nullptr, &Affinity);
|
||||||
|
ASSERT_EQ(0, Ret);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // #if LLVM_ENABLE_THREADS == 1
|
||||||
|
Loading…
x
Reference in New Issue
Block a user