From 129f5a276825542b04c5686dc8eee6d766d65ab3 Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Tue, 3 Oct 2017 16:25:15 +0000 Subject: [PATCH] Use sched_getaffinity instead of std::thread::hardware_concurrency. The issue with std::thread::hardware_concurrency is that it forwards to libc and some implementations (like glibc) don't take thread affinity into consideration. With this change a llvm program that can execute in only 2 cores will use 2 threads, even if the machine has 32 cores. This makes benchmarking a lot easier, but should also help if someone doesn't want to use all cores for compilation for example. llvm-svn: 314809 --- cmake/config-ix.cmake | 1 + include/llvm/Config/config.h.cmake | 3 +++ include/llvm/Support/ThreadPool.h | 4 ++-- include/llvm/Support/Threading.h | 8 ++++++++ lib/Fuzzer/FuzzerUtil.cpp | 10 +--------- lib/Support/Parallel.cpp | 4 ++-- lib/Support/ThreadPool.cpp | 5 +++-- lib/Support/Threading.cpp | 14 ++++++++++++++ tools/llvm-profdata/llvm-profdata.cpp | 4 ++-- 9 files changed, 36 insertions(+), 17 deletions(-) diff --git a/cmake/config-ix.cmake b/cmake/config-ix.cmake index a1a16b99eb1..c89e46227c5 100644 --- a/cmake/config-ix.cmake +++ b/cmake/config-ix.cmake @@ -269,6 +269,7 @@ if( LLVM_USING_GLIBC ) add_definitions( -D_GNU_SOURCE ) endif() # This check requires _GNU_SOURCE +check_library_exists(c sched_getaffinity "" HAVE_SCHED_GETAFFINITY) if(HAVE_LIBPTHREAD) check_library_exists(pthread pthread_getname_np "" HAVE_PTHREAD_GETNAME_NP) check_library_exists(pthread pthread_setname_np "" HAVE_PTHREAD_SETNAME_NP) diff --git a/include/llvm/Config/config.h.cmake b/include/llvm/Config/config.h.cmake index d0d1e0985cc..f3506de015b 100644 --- a/include/llvm/Config/config.h.cmake +++ b/include/llvm/Config/config.h.cmake @@ -185,6 +185,9 @@ /* Define to 1 if you have the `setenv' function. */ #cmakedefine HAVE_SETENV ${HAVE_SETENV} +/* Define to 1 if you have the `sched_getaffinity' function. */ +#cmakedefine HAVE_SCHED_GETAFFINITY ${HAVE_SCHED_GETAFFINITY} + /* Define to 1 if you have the `setrlimit' function. */ #cmakedefine HAVE_SETRLIMIT ${HAVE_SETRLIMIT} diff --git a/include/llvm/Support/ThreadPool.h b/include/llvm/Support/ThreadPool.h index 9ada946c6da..fb825590051 100644 --- a/include/llvm/Support/ThreadPool.h +++ b/include/llvm/Support/ThreadPool.h @@ -38,8 +38,8 @@ public: using TaskTy = std::function; using PackagedTaskTy = std::packaged_task; - /// Construct a pool with the number of core available on the system (or - /// whatever the value returned by std::thread::hardware_concurrency() is). + /// Construct a pool with the number of threads found by + /// hardware_concurrency(). ThreadPool(); /// Construct a pool of \p ThreadCount threads diff --git a/include/llvm/Support/Threading.h b/include/llvm/Support/Threading.h index 03963a24c10..6d813bccb93 100644 --- a/include/llvm/Support/Threading.h +++ b/include/llvm/Support/Threading.h @@ -131,6 +131,14 @@ void llvm_execute_on_thread(void (*UserFn)(void *), void *UserData, /// Returns 1 when LLVM is configured with LLVM_ENABLE_THREADS=OFF unsigned heavyweight_hardware_concurrency(); + /// Get the number of threads that the current program can execute + /// concurrently. On some systems std::thread::hardware_concurrency() returns + /// the total number of cores, without taking affinity into consideration. + /// Returns 1 when LLVM is configured with LLVM_ENABLE_THREADS=OFF. + /// Fallback to std::thread::hardware_concurrency() if sched_getaffinity is + /// not available. + unsigned hardware_concurrency(); + /// \brief Return the current thread id, as used in various OS system calls. /// Note that not all platforms guarantee that the value returned will be /// unique across the entire system, so portable code should not assume diff --git a/lib/Fuzzer/FuzzerUtil.cpp b/lib/Fuzzer/FuzzerUtil.cpp index f5a77737449..5f76ddc3678 100644 --- a/lib/Fuzzer/FuzzerUtil.cpp +++ b/lib/Fuzzer/FuzzerUtil.cpp @@ -195,15 +195,7 @@ void PrintPC(const char *SymbolizedFMT, const char *FallbackFMT, uintptr_t PC) { Printf(FallbackFMT, PC); } -unsigned NumberOfCpuCores() { - unsigned N = std::thread::hardware_concurrency(); - if (!N) { - Printf("WARNING: std::thread::hardware_concurrency not well defined for " - "your platform. Assuming CPU count of 1.\n"); - N = 1; - } - return N; -} +unsigned NumberOfCpuCores() { return hardware_concurrency(); } size_t SimpleFastHash(const uint8_t *Data, size_t Size) { size_t Res = 0; diff --git a/lib/Support/Parallel.cpp b/lib/Support/Parallel.cpp index ab2cfdebf07..010e42916f9 100644 --- a/lib/Support/Parallel.cpp +++ b/lib/Support/Parallel.cpp @@ -9,6 +9,7 @@ #include "llvm/Support/Parallel.h" #include "llvm/Config/llvm-config.h" +#include "llvm/Support/Threading.h" #include #include @@ -70,8 +71,7 @@ Executor *Executor::getDefaultExecutor() { /// in filo order. class ThreadPoolExecutor : public Executor { public: - explicit ThreadPoolExecutor( - unsigned ThreadCount = std::thread::hardware_concurrency()) + explicit ThreadPoolExecutor(unsigned ThreadCount = hardware_concurrency()) : Done(ThreadCount) { // Spawn all but one of the threads in another thread as spawning threads // can take a while. diff --git a/lib/Support/ThreadPool.cpp b/lib/Support/ThreadPool.cpp index 22b7550d497..f1b5bdf40c3 100644 --- a/lib/Support/ThreadPool.cpp +++ b/lib/Support/ThreadPool.cpp @@ -14,14 +14,15 @@ #include "llvm/Support/ThreadPool.h" #include "llvm/Config/llvm-config.h" +#include "llvm/Support/Threading.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; #if LLVM_ENABLE_THREADS -// Default to std::thread::hardware_concurrency -ThreadPool::ThreadPool() : ThreadPool(std::thread::hardware_concurrency()) {} +// Default to hardware_concurrency +ThreadPool::ThreadPool() : ThreadPool(hardware_concurrency()) {} ThreadPool::ThreadPool(unsigned ThreadCount) : ActiveThreads(0), EnableFlag(true) { diff --git a/lib/Support/Threading.cpp b/lib/Support/Threading.cpp index 6a10b988d46..b3579b57548 100644 --- a/lib/Support/Threading.cpp +++ b/lib/Support/Threading.cpp @@ -47,6 +47,8 @@ void llvm::llvm_execute_on_thread(void (*Fn)(void *), void *UserData, unsigned llvm::heavyweight_hardware_concurrency() { return 1; } +unsigned llvm::hardware_concurrency() { return 1; } + uint64_t llvm::get_threadid() { return 0; } uint32_t llvm::get_max_thread_name_length() { return 0; } @@ -71,6 +73,18 @@ unsigned llvm::heavyweight_hardware_concurrency() { return NumPhysical; } +unsigned llvm::hardware_concurrency() { +#ifdef HAVE_SCHED_GETAFFINITY + cpu_set_t Set; + if (sched_getaffinity(0, sizeof(Set), &Set)) + return CPU_COUNT(&Set); +#endif + // Guard against std::thread::hardware_concurrency() returning 0. + if (unsigned Val = std::thread::hardware_concurrency()) + return Val; + return 1; +} + // Include the platform-specific parts of this class. #ifdef LLVM_ON_UNIX #include "Unix/Threading.inc" diff --git a/tools/llvm-profdata/llvm-profdata.cpp b/tools/llvm-profdata/llvm-profdata.cpp index eee242107da..8e21a7a9b4f 100644 --- a/tools/llvm-profdata/llvm-profdata.cpp +++ b/tools/llvm-profdata/llvm-profdata.cpp @@ -211,8 +211,8 @@ static void mergeInstrProfile(const WeightedFileVector &Inputs, // If NumThreads is not specified, auto-detect a good default. if (NumThreads == 0) - NumThreads = std::max(1U, std::min(std::thread::hardware_concurrency(), - unsigned(Inputs.size() / 2))); + NumThreads = + std::min(hardware_concurrency(), unsigned((Inputs.size() + 1) / 2)); // Initialize the writer contexts. SmallVector, 4> Contexts;