mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 11:42:57 +01:00
61ed3dc5bf
Before this patch, it wasn't possible to extend the ThinLTO threads to all SMT/CMT threads in the system. Only one thread per core was allowed, instructed by usage of llvm::heavyweight_hardware_concurrency() in the ThinLTO code. Any number passed to the LLD flag /opt:lldltojobs=..., or any other ThinLTO-specific flag, was previously interpreted in the context of llvm::heavyweight_hardware_concurrency(), which means SMT disabled. One can now say in LLD: /opt:lldltojobs=0 -- Use one std::thread / hardware core in the system (no SMT). Default value if flag not specified. /opt:lldltojobs=N -- Limit usage to N threads, regardless of usage of heavyweight_hardware_concurrency(). /opt:lldltojobs=all -- Use all hardware threads in the system. Equivalent to /opt:lldltojobs=$(nproc) on Linux and /opt:lldltojobs=%NUMBER_OF_PROCESSORS% on Windows. When an affinity mask is set for the process, threads will be created only for the cores selected by the mask. When N > number-of-hardware-threads-in-the-system, the threads in the thread pool will be dispatched equally on all CPU sockets (tested only on Windows). When N <= number-of-hardware-threads-on-a-CPU-socket, the threads will remain on the CPU socket where the process started (only on Windows). Differential Revision: https://reviews.llvm.org/D75153
299 lines
9.7 KiB
C++
299 lines
9.7 KiB
C++
//===- Windows/Threading.inc - Win32 Threading Implementation - -*- C++ -*-===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This file provides the Win32 specific implementation of Threading functions.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "llvm/ADT/SmallString.h"
|
|
#include "llvm/ADT/Twine.h"
|
|
|
|
#include "llvm/Support/Windows/WindowsSupport.h"
|
|
#include <process.h>
|
|
|
|
#include <bitset>
|
|
|
|
// Windows will at times define MemoryFence.
|
|
#ifdef MemoryFence
|
|
#undef MemoryFence
|
|
#endif
|
|
|
|
static unsigned __stdcall threadFuncSync(void *Arg) {
|
|
SyncThreadInfo *TI = static_cast<SyncThreadInfo *>(Arg);
|
|
TI->UserFn(TI->UserData);
|
|
return 0;
|
|
}
|
|
|
|
static unsigned __stdcall threadFuncAsync(void *Arg) {
|
|
std::unique_ptr<AsyncThreadInfo> Info(static_cast<AsyncThreadInfo *>(Arg));
|
|
(*Info)();
|
|
return 0;
|
|
}
|
|
|
|
static void
|
|
llvm_execute_on_thread_impl(unsigned (__stdcall *ThreadFunc)(void *), void *Arg,
|
|
llvm::Optional<unsigned> StackSizeInBytes,
|
|
JoiningPolicy JP) {
|
|
HANDLE hThread = (HANDLE)::_beginthreadex(
|
|
NULL, StackSizeInBytes.getValueOr(0), ThreadFunc, Arg, 0, NULL);
|
|
|
|
if (!hThread) {
|
|
ReportLastErrorFatal("_beginthreadex failed");
|
|
}
|
|
|
|
if (JP == JoiningPolicy::Join) {
|
|
if (::WaitForSingleObject(hThread, INFINITE) == WAIT_FAILED) {
|
|
ReportLastErrorFatal("WaitForSingleObject failed");
|
|
}
|
|
}
|
|
if (::CloseHandle(hThread) == FALSE) {
|
|
ReportLastErrorFatal("CloseHandle failed");
|
|
}
|
|
}
|
|
|
|
uint64_t llvm::get_threadid() {
|
|
return uint64_t(::GetCurrentThreadId());
|
|
}
|
|
|
|
uint32_t llvm::get_max_thread_name_length() { return 0; }
|
|
|
|
#if defined(_MSC_VER)
|
|
static void SetThreadName(DWORD Id, LPCSTR Name) {
|
|
constexpr DWORD MS_VC_EXCEPTION = 0x406D1388;
|
|
|
|
#pragma pack(push, 8)
|
|
struct THREADNAME_INFO {
|
|
DWORD dwType; // Must be 0x1000.
|
|
LPCSTR szName; // Pointer to thread name
|
|
DWORD dwThreadId; // Thread ID (-1 == current thread)
|
|
DWORD dwFlags; // Reserved. Do not use.
|
|
};
|
|
#pragma pack(pop)
|
|
|
|
THREADNAME_INFO info;
|
|
info.dwType = 0x1000;
|
|
info.szName = Name;
|
|
info.dwThreadId = Id;
|
|
info.dwFlags = 0;
|
|
|
|
__try {
|
|
::RaiseException(MS_VC_EXCEPTION, 0, sizeof(info) / sizeof(ULONG_PTR),
|
|
(ULONG_PTR *)&info);
|
|
}
|
|
__except (EXCEPTION_EXECUTE_HANDLER) {
|
|
}
|
|
}
|
|
#endif
|
|
|
|
void llvm::set_thread_name(const Twine &Name) {
|
|
#if defined(_MSC_VER)
|
|
// Make sure the input is null terminated.
|
|
SmallString<64> Storage;
|
|
StringRef NameStr = Name.toNullTerminatedStringRef(Storage);
|
|
SetThreadName(::GetCurrentThreadId(), NameStr.data());
|
|
#endif
|
|
}
|
|
|
|
void llvm::get_thread_name(SmallVectorImpl<char> &Name) {
|
|
// "Name" is not an inherent property of a thread on Windows. In fact, when
|
|
// you "set" the name, you are only firing a one-time message to a debugger
|
|
// which it interprets as a program setting its threads' name. We may be
|
|
// able to get fancy by creating a TLS entry when someone calls
|
|
// set_thread_name so that subsequent calls to get_thread_name return this
|
|
// value.
|
|
Name.clear();
|
|
}
|
|
|
|
SetThreadPriorityResult llvm::set_thread_priority(ThreadPriority Priority) {
|
|
// https://docs.microsoft.com/en-us/windows/desktop/api/processthreadsapi/nf-processthreadsapi-setthreadpriority
|
|
// Begin background processing mode. The system lowers the resource scheduling
|
|
// priorities of the thread so that it can perform background work without
|
|
// significantly affecting activity in the foreground.
|
|
// End background processing mode. The system restores the resource scheduling
|
|
// priorities of the thread as they were before the thread entered background
|
|
// processing mode.
|
|
return SetThreadPriority(GetCurrentThread(),
|
|
Priority == ThreadPriority::Background
|
|
? THREAD_MODE_BACKGROUND_BEGIN
|
|
: THREAD_MODE_BACKGROUND_END)
|
|
? SetThreadPriorityResult::SUCCESS
|
|
: SetThreadPriorityResult::FAILURE;
|
|
}
|
|
|
|
struct ProcessorGroup {
|
|
unsigned ID;
|
|
unsigned AllThreads;
|
|
unsigned UsableThreads;
|
|
unsigned ThreadsPerCore;
|
|
uint64_t Affinity;
|
|
|
|
unsigned useableCores() const {
|
|
return std::max(1U, UsableThreads / ThreadsPerCore);
|
|
}
|
|
};
|
|
|
|
template <typename F>
|
|
static bool IterateProcInfo(LOGICAL_PROCESSOR_RELATIONSHIP Relationship, F Fn) {
|
|
DWORD Len = 0;
|
|
BOOL R = ::GetLogicalProcessorInformationEx(Relationship, NULL, &Len);
|
|
if (R || GetLastError() != ERROR_INSUFFICIENT_BUFFER) {
|
|
return false;
|
|
}
|
|
auto *Info = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)calloc(1, Len);
|
|
R = ::GetLogicalProcessorInformationEx(Relationship, Info, &Len);
|
|
if (R) {
|
|
auto *End =
|
|
(SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)((uint8_t *)Info + Len);
|
|
for (auto *Curr = Info; Curr < End;
|
|
Curr = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)((uint8_t *)Curr +
|
|
Curr->Size)) {
|
|
if (Curr->Relationship != Relationship)
|
|
continue;
|
|
Fn(Curr);
|
|
}
|
|
}
|
|
free(Info);
|
|
return true;
|
|
}
|
|
|
|
static ArrayRef<ProcessorGroup> getProcessorGroups() {
|
|
auto computeGroups = []() {
|
|
SmallVector<ProcessorGroup, 4> Groups;
|
|
|
|
auto HandleGroup = [&](SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *ProcInfo) {
|
|
GROUP_RELATIONSHIP &El = ProcInfo->Group;
|
|
for (unsigned J = 0; J < El.ActiveGroupCount; ++J) {
|
|
ProcessorGroup G;
|
|
G.ID = Groups.size();
|
|
G.AllThreads = El.GroupInfo[J].MaximumProcessorCount;
|
|
G.UsableThreads = El.GroupInfo[J].ActiveProcessorCount;
|
|
assert(G.UsableThreads <= 64);
|
|
G.Affinity = El.GroupInfo[J].ActiveProcessorMask;
|
|
Groups.push_back(G);
|
|
}
|
|
};
|
|
|
|
if (!IterateProcInfo(RelationGroup, HandleGroup))
|
|
return std::vector<ProcessorGroup>();
|
|
|
|
auto HandleProc = [&](SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *ProcInfo) {
|
|
PROCESSOR_RELATIONSHIP &El = ProcInfo->Processor;
|
|
assert(El.GroupCount == 1);
|
|
unsigned NumHyperThreads = 1;
|
|
// If the flag is set, each core supports more than one hyper-thread.
|
|
if (El.Flags & LTP_PC_SMT)
|
|
NumHyperThreads = std::bitset<64>(El.GroupMask[0].Mask).count();
|
|
unsigned I = El.GroupMask[0].Group;
|
|
Groups[I].ThreadsPerCore = NumHyperThreads;
|
|
};
|
|
|
|
if (!IterateProcInfo(RelationProcessorCore, HandleProc))
|
|
return std::vector<ProcessorGroup>();
|
|
|
|
// If there's an affinity mask set on one of the CPUs, then assume the user
|
|
// wants to constrain the current process to only a single CPU.
|
|
for (auto &G : Groups) {
|
|
if (G.UsableThreads != G.AllThreads) {
|
|
ProcessorGroup NewG{G};
|
|
Groups.clear();
|
|
Groups.push_back(NewG);
|
|
break;
|
|
}
|
|
}
|
|
|
|
return std::vector<ProcessorGroup>(Groups.begin(), Groups.end());
|
|
};
|
|
static auto Groups = computeGroups();
|
|
return ArrayRef<ProcessorGroup>(Groups);
|
|
}
|
|
|
|
template <typename R, typename UnaryPredicate>
|
|
static unsigned aggregate(R &&Range, UnaryPredicate P) {
|
|
unsigned I{};
|
|
for (const auto &It : Range)
|
|
I += P(It);
|
|
return I;
|
|
}
|
|
|
|
// for sys::getHostNumPhysicalCores
|
|
int computeHostNumPhysicalCores() {
|
|
static unsigned Cores =
|
|
aggregate(getProcessorGroups(), [](const ProcessorGroup &G) {
|
|
return G.UsableThreads / G.ThreadsPerCore;
|
|
});
|
|
return Cores;
|
|
}
|
|
|
|
int computeHostNumHardwareThreads() {
|
|
static unsigned Threads =
|
|
aggregate(getProcessorGroups(),
|
|
[](const ProcessorGroup &G) { return G.UsableThreads; });
|
|
return Threads;
|
|
}
|
|
|
|
// Finds the proper CPU socket where a thread number should go. Returns 'None'
|
|
// if the thread shall remain on the actual CPU socket.
|
|
Optional<unsigned>
|
|
llvm::ThreadPoolStrategy::compute_cpu_socket(unsigned ThreadPoolNum) const {
|
|
ArrayRef<ProcessorGroup> Groups = getProcessorGroups();
|
|
// Only one CPU socket in the system or process affinity was set, no need to
|
|
// move the thread(s) to another CPU socket.
|
|
if (Groups.size() <= 1)
|
|
return None;
|
|
|
|
// We ask for less threads than there are hardware threads per CPU socket, no
|
|
// need to dispatch threads to other CPU sockets.
|
|
unsigned MaxThreadsPerSocket =
|
|
UseHyperThreads ? Groups[0].UsableThreads : Groups[0].useableCores();
|
|
if (compute_thread_count() <= MaxThreadsPerSocket)
|
|
return None;
|
|
|
|
assert(ThreadPoolNum < compute_thread_count() &&
|
|
"The thread index is not within thread strategy's range!");
|
|
|
|
// Assumes the same number of hardware threads per CPU socket.
|
|
return (ThreadPoolNum * Groups.size()) / compute_thread_count();
|
|
}
|
|
|
|
// Assign the current thread to a more appropriate CPU socket or CPU group
|
|
void llvm::ThreadPoolStrategy::apply_thread_strategy(
|
|
unsigned ThreadPoolNum) const {
|
|
Optional<unsigned> Socket = compute_cpu_socket(ThreadPoolNum);
|
|
if (!Socket)
|
|
return;
|
|
ArrayRef<ProcessorGroup> Groups = getProcessorGroups();
|
|
GROUP_AFFINITY Affinity{};
|
|
Affinity.Group = Groups[*Socket].ID;
|
|
Affinity.Mask = Groups[*Socket].Affinity;
|
|
SetThreadGroupAffinity(GetCurrentThread(), &Affinity, nullptr);
|
|
}
|
|
|
|
llvm::BitVector llvm::get_thread_affinity_mask() {
|
|
GROUP_AFFINITY Affinity{};
|
|
GetThreadGroupAffinity(GetCurrentThread(), &Affinity);
|
|
|
|
static unsigned All =
|
|
aggregate(getProcessorGroups(),
|
|
[](const ProcessorGroup &G) { return G.AllThreads; });
|
|
|
|
unsigned StartOffset =
|
|
aggregate(getProcessorGroups(), [&](const ProcessorGroup &G) {
|
|
return G.ID < Affinity.Group ? G.AllThreads : 0;
|
|
});
|
|
|
|
llvm::BitVector V;
|
|
V.resize(All);
|
|
for (unsigned I = 0; I < sizeof(KAFFINITY) * 8; ++I) {
|
|
if ((Affinity.Mask >> I) & 1)
|
|
V.set(StartOffset + I);
|
|
}
|
|
return V;
|
|
}
|
|
|
|
unsigned llvm::get_cpus() { return getProcessorGroups().size(); }
|