mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 03:02:36 +01:00
0f55045526
This patch pulls google/benchmark v1.4.1 into the LLVM tree so that any project could use it for benchmark generation. A dummy benchmark is added to `llvm/benchmarks/DummyYAML.cpp` to validate the correctness of the build process. The current version does not utilize LLVM LNT and LLVM CMake infrastructure, but that might be sufficient for most users. Two introduced CMake variables: * `LLVM_INCLUDE_BENCHMARKS` (`ON` by default) generates benchmark targets * `LLVM_BUILD_BENCHMARKS` (`OFF` by default) adds generated benchmark targets to the list of default LLVM targets (i.e. if `ON` benchmarks will be built upon standard build invocation, e.g. `ninja` or `make` with no specific targets) List of modifications: * `BENCHMARK_ENABLE_TESTING` is disabled * `BENCHMARK_ENABLE_EXCEPTIONS` is disabled * `BENCHMARK_ENABLE_INSTALL` is disabled * `BENCHMARK_ENABLE_GTEST_TESTS` is disabled * `BENCHMARK_DOWNLOAD_DEPENDENCIES` is disabled Original discussion can be found here: http://lists.llvm.org/pipermail/llvm-dev/2018-August/125023.html Reviewed by: dberris, lebedev.ri Subscribers: ilya-biryukov, ioeric, EricWF, lebedev.ri, srhines, dschuff, mgorny, krytarowski, fedor.sergeev, mgrang, jfb, llvm-commits Differential Revision: https://reviews.llvm.org/D50894 llvm-svn: 340809
246 lines
7.1 KiB
C++
246 lines
7.1 KiB
C++
#include "benchmark/benchmark.h"
|
|
|
|
#include <assert.h>
|
|
#include <math.h>
|
|
#include <stdint.h>
|
|
|
|
#include <chrono>
|
|
#include <cstdlib>
|
|
#include <iostream>
|
|
#include <limits>
|
|
#include <list>
|
|
#include <map>
|
|
#include <mutex>
|
|
#include <set>
|
|
#include <sstream>
|
|
#include <string>
|
|
#include <thread>
|
|
#include <utility>
|
|
#include <vector>
|
|
|
|
#if defined(__GNUC__)
|
|
#define BENCHMARK_NOINLINE __attribute__((noinline))
|
|
#else
|
|
#define BENCHMARK_NOINLINE
|
|
#endif
|
|
|
|
namespace {
|
|
|
|
int BENCHMARK_NOINLINE Factorial(uint32_t n) {
|
|
return (n == 1) ? 1 : n * Factorial(n - 1);
|
|
}
|
|
|
|
double CalculatePi(int depth) {
|
|
double pi = 0.0;
|
|
for (int i = 0; i < depth; ++i) {
|
|
double numerator = static_cast<double>(((i % 2) * 2) - 1);
|
|
double denominator = static_cast<double>((2 * i) - 1);
|
|
pi += numerator / denominator;
|
|
}
|
|
return (pi - 1.0) * 4;
|
|
}
|
|
|
|
std::set<int64_t> ConstructRandomSet(int64_t size) {
|
|
std::set<int64_t> s;
|
|
for (int i = 0; i < size; ++i) s.insert(s.end(), i);
|
|
return s;
|
|
}
|
|
|
|
std::mutex test_vector_mu;
|
|
std::vector<int>* test_vector = nullptr;
|
|
|
|
} // end namespace
|
|
|
|
static void BM_Factorial(benchmark::State& state) {
|
|
int fac_42 = 0;
|
|
for (auto _ : state) fac_42 = Factorial(8);
|
|
// Prevent compiler optimizations
|
|
std::stringstream ss;
|
|
ss << fac_42;
|
|
state.SetLabel(ss.str());
|
|
}
|
|
BENCHMARK(BM_Factorial);
|
|
BENCHMARK(BM_Factorial)->UseRealTime();
|
|
|
|
static void BM_CalculatePiRange(benchmark::State& state) {
|
|
double pi = 0.0;
|
|
for (auto _ : state) pi = CalculatePi(static_cast<int>(state.range(0)));
|
|
std::stringstream ss;
|
|
ss << pi;
|
|
state.SetLabel(ss.str());
|
|
}
|
|
BENCHMARK_RANGE(BM_CalculatePiRange, 1, 1024 * 1024);
|
|
|
|
static void BM_CalculatePi(benchmark::State& state) {
|
|
static const int depth = 1024;
|
|
for (auto _ : state) {
|
|
benchmark::DoNotOptimize(CalculatePi(static_cast<int>(depth)));
|
|
}
|
|
}
|
|
BENCHMARK(BM_CalculatePi)->Threads(8);
|
|
BENCHMARK(BM_CalculatePi)->ThreadRange(1, 32);
|
|
BENCHMARK(BM_CalculatePi)->ThreadPerCpu();
|
|
|
|
static void BM_SetInsert(benchmark::State& state) {
|
|
std::set<int64_t> data;
|
|
for (auto _ : state) {
|
|
state.PauseTiming();
|
|
data = ConstructRandomSet(state.range(0));
|
|
state.ResumeTiming();
|
|
for (int j = 0; j < state.range(1); ++j) data.insert(rand());
|
|
}
|
|
state.SetItemsProcessed(state.iterations() * state.range(1));
|
|
state.SetBytesProcessed(state.iterations() * state.range(1) * sizeof(int));
|
|
}
|
|
|
|
// Test many inserts at once to reduce the total iterations needed. Otherwise, the slower,
|
|
// non-timed part of each iteration will make the benchmark take forever.
|
|
BENCHMARK(BM_SetInsert)->Ranges({{1 << 10, 8 << 10}, {128, 512}});
|
|
|
|
template <typename Container,
|
|
typename ValueType = typename Container::value_type>
|
|
static void BM_Sequential(benchmark::State& state) {
|
|
ValueType v = 42;
|
|
for (auto _ : state) {
|
|
Container c;
|
|
for (int64_t i = state.range(0); --i;) c.push_back(v);
|
|
}
|
|
const int64_t items_processed = state.iterations() * state.range(0);
|
|
state.SetItemsProcessed(items_processed);
|
|
state.SetBytesProcessed(items_processed * sizeof(v));
|
|
}
|
|
BENCHMARK_TEMPLATE2(BM_Sequential, std::vector<int>, int)
|
|
->Range(1 << 0, 1 << 10);
|
|
BENCHMARK_TEMPLATE(BM_Sequential, std::list<int>)->Range(1 << 0, 1 << 10);
|
|
// Test the variadic version of BENCHMARK_TEMPLATE in C++11 and beyond.
|
|
#ifdef BENCHMARK_HAS_CXX11
|
|
BENCHMARK_TEMPLATE(BM_Sequential, std::vector<int>, int)->Arg(512);
|
|
#endif
|
|
|
|
static void BM_StringCompare(benchmark::State& state) {
|
|
size_t len = static_cast<size_t>(state.range(0));
|
|
std::string s1(len, '-');
|
|
std::string s2(len, '-');
|
|
for (auto _ : state) benchmark::DoNotOptimize(s1.compare(s2));
|
|
}
|
|
BENCHMARK(BM_StringCompare)->Range(1, 1 << 20);
|
|
|
|
static void BM_SetupTeardown(benchmark::State& state) {
|
|
if (state.thread_index == 0) {
|
|
// No need to lock test_vector_mu here as this is running single-threaded.
|
|
test_vector = new std::vector<int>();
|
|
}
|
|
int i = 0;
|
|
for (auto _ : state) {
|
|
std::lock_guard<std::mutex> l(test_vector_mu);
|
|
if (i % 2 == 0)
|
|
test_vector->push_back(i);
|
|
else
|
|
test_vector->pop_back();
|
|
++i;
|
|
}
|
|
if (state.thread_index == 0) {
|
|
delete test_vector;
|
|
}
|
|
}
|
|
BENCHMARK(BM_SetupTeardown)->ThreadPerCpu();
|
|
|
|
static void BM_LongTest(benchmark::State& state) {
|
|
double tracker = 0.0;
|
|
for (auto _ : state) {
|
|
for (int i = 0; i < state.range(0); ++i)
|
|
benchmark::DoNotOptimize(tracker += i);
|
|
}
|
|
}
|
|
BENCHMARK(BM_LongTest)->Range(1 << 16, 1 << 28);
|
|
|
|
static void BM_ParallelMemset(benchmark::State& state) {
|
|
int64_t size = state.range(0) / static_cast<int64_t>(sizeof(int));
|
|
int thread_size = static_cast<int>(size) / state.threads;
|
|
int from = thread_size * state.thread_index;
|
|
int to = from + thread_size;
|
|
|
|
if (state.thread_index == 0) {
|
|
test_vector = new std::vector<int>(static_cast<size_t>(size));
|
|
}
|
|
|
|
for (auto _ : state) {
|
|
for (int i = from; i < to; i++) {
|
|
// No need to lock test_vector_mu as ranges
|
|
// do not overlap between threads.
|
|
benchmark::DoNotOptimize(test_vector->at(i) = 1);
|
|
}
|
|
}
|
|
|
|
if (state.thread_index == 0) {
|
|
delete test_vector;
|
|
}
|
|
}
|
|
BENCHMARK(BM_ParallelMemset)->Arg(10 << 20)->ThreadRange(1, 4);
|
|
|
|
static void BM_ManualTiming(benchmark::State& state) {
|
|
int64_t slept_for = 0;
|
|
int64_t microseconds = state.range(0);
|
|
std::chrono::duration<double, std::micro> sleep_duration{
|
|
static_cast<double>(microseconds)};
|
|
|
|
for (auto _ : state) {
|
|
auto start = std::chrono::high_resolution_clock::now();
|
|
// Simulate some useful workload with a sleep
|
|
std::this_thread::sleep_for(
|
|
std::chrono::duration_cast<std::chrono::nanoseconds>(sleep_duration));
|
|
auto end = std::chrono::high_resolution_clock::now();
|
|
|
|
auto elapsed =
|
|
std::chrono::duration_cast<std::chrono::duration<double>>(end - start);
|
|
|
|
state.SetIterationTime(elapsed.count());
|
|
slept_for += microseconds;
|
|
}
|
|
state.SetItemsProcessed(slept_for);
|
|
}
|
|
BENCHMARK(BM_ManualTiming)->Range(1, 1 << 14)->UseRealTime();
|
|
BENCHMARK(BM_ManualTiming)->Range(1, 1 << 14)->UseManualTime();
|
|
|
|
#ifdef BENCHMARK_HAS_CXX11
|
|
|
|
template <class... Args>
|
|
void BM_with_args(benchmark::State& state, Args&&...) {
|
|
for (auto _ : state) {
|
|
}
|
|
}
|
|
BENCHMARK_CAPTURE(BM_with_args, int_test, 42, 43, 44);
|
|
BENCHMARK_CAPTURE(BM_with_args, string_and_pair_test, std::string("abc"),
|
|
std::pair<int, double>(42, 3.8));
|
|
|
|
void BM_non_template_args(benchmark::State& state, int, double) {
|
|
while(state.KeepRunning()) {}
|
|
}
|
|
BENCHMARK_CAPTURE(BM_non_template_args, basic_test, 0, 0);
|
|
|
|
#endif // BENCHMARK_HAS_CXX11
|
|
|
|
static void BM_DenseThreadRanges(benchmark::State& st) {
|
|
switch (st.range(0)) {
|
|
case 1:
|
|
assert(st.threads == 1 || st.threads == 2 || st.threads == 3);
|
|
break;
|
|
case 2:
|
|
assert(st.threads == 1 || st.threads == 3 || st.threads == 4);
|
|
break;
|
|
case 3:
|
|
assert(st.threads == 5 || st.threads == 8 || st.threads == 11 ||
|
|
st.threads == 14);
|
|
break;
|
|
default:
|
|
assert(false && "Invalid test case number");
|
|
}
|
|
while (st.KeepRunning()) {
|
|
}
|
|
}
|
|
BENCHMARK(BM_DenseThreadRanges)->Arg(1)->DenseThreadRange(1, 3);
|
|
BENCHMARK(BM_DenseThreadRanges)->Arg(2)->DenseThreadRange(1, 4, 2);
|
|
BENCHMARK(BM_DenseThreadRanges)->Arg(3)->DenseThreadRange(5, 14, 3);
|
|
|
|
BENCHMARK_MAIN();
|