30#ifndef ANKERL_NANOBENCH_H_INCLUDED
31#define ANKERL_NANOBENCH_H_INCLUDED
34#define ANKERL_NANOBENCH_VERSION_MAJOR 4
35#define ANKERL_NANOBENCH_VERSION_MINOR 3
36#define ANKERL_NANOBENCH_VERSION_PATCH 11
46#include <unordered_map>
49#define ANKERL_NANOBENCH(x) ANKERL_NANOBENCH_PRIVATE_##x()
51#define ANKERL_NANOBENCH_PRIVATE_CXX() __cplusplus
52#define ANKERL_NANOBENCH_PRIVATE_CXX98() 199711L
53#define ANKERL_NANOBENCH_PRIVATE_CXX11() 201103L
54#define ANKERL_NANOBENCH_PRIVATE_CXX14() 201402L
55#define ANKERL_NANOBENCH_PRIVATE_CXX17() 201703L
57#if ANKERL_NANOBENCH(CXX) >= ANKERL_NANOBENCH(CXX17)
58# define ANKERL_NANOBENCH_PRIVATE_NODISCARD() [[nodiscard]]
60# define ANKERL_NANOBENCH_PRIVATE_NODISCARD()
64# define ANKERL_NANOBENCH_PRIVATE_IGNORE_PADDED_PUSH() \
65 _Pragma("clang diagnostic push") _Pragma("clang diagnostic ignored \"-Wpadded\"")
66# define ANKERL_NANOBENCH_PRIVATE_IGNORE_PADDED_POP() _Pragma("clang diagnostic pop")
68# define ANKERL_NANOBENCH_PRIVATE_IGNORE_PADDED_PUSH()
69# define ANKERL_NANOBENCH_PRIVATE_IGNORE_PADDED_POP()
73# define ANKERL_NANOBENCH_PRIVATE_IGNORE_EFFCPP_PUSH() _Pragma("GCC diagnostic push") _Pragma("GCC diagnostic ignored \"-Weffc++\"")
74# define ANKERL_NANOBENCH_PRIVATE_IGNORE_EFFCPP_POP() _Pragma("GCC diagnostic pop")
76# define ANKERL_NANOBENCH_PRIVATE_IGNORE_EFFCPP_PUSH()
77# define ANKERL_NANOBENCH_PRIVATE_IGNORE_EFFCPP_POP()
80#if defined(ANKERL_NANOBENCH_LOG_ENABLED)
82# define ANKERL_NANOBENCH_LOG(x) \
84 std::cout << __FUNCTION__ << "@" << __LINE__ << ": " << x << std::endl; \
87# define ANKERL_NANOBENCH_LOG(x) \
92#define ANKERL_NANOBENCH_PRIVATE_PERF_COUNTERS() 0
93#if defined(__linux__) && !defined(ANKERL_NANOBENCH_DISABLE_PERF_COUNTERS)
94# include <linux/version.h>
95# if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 3, 0)
98# undef ANKERL_NANOBENCH_PRIVATE_PERF_COUNTERS
99# define ANKERL_NANOBENCH_PRIVATE_PERF_COUNTERS() 1
103#if defined(__clang__)
104# define ANKERL_NANOBENCH_NO_SANITIZE(...) __attribute__((no_sanitize(__VA_ARGS__)))
106# define ANKERL_NANOBENCH_NO_SANITIZE(...)
110# define ANKERL_NANOBENCH_PRIVATE_NOINLINE() __declspec(noinline)
112# define ANKERL_NANOBENCH_PRIVATE_NOINLINE() __attribute__((noinline))
117#if defined(__GNUC__) && __GNUC__ < 5
118# define ANKERL_NANOBENCH_IS_TRIVIALLY_COPYABLE(...) __has_trivial_copy(__VA_ARGS__)
120# define ANKERL_NANOBENCH_IS_TRIVIALLY_COPYABLE(...) std::is_trivially_copyable<__VA_ARGS__>::value
125#define ANKERL_NANOBENCH_PRIVATE_NOEXCEPT_STRING_MOVE() std::is_nothrow_move_assignable<std::string>::value
132using Clock = std::conditional<std::chrono::high_resolution_clock::is_steady, std::chrono::high_resolution_clock,
133 std::chrono::steady_clock>::type;
316char const*
csv() noexcept;
347char const* json() noexcept;
359#if ANKERL_NANOBENCH(PERF_COUNTERS)
388 std::string mBenchmarkTitle =
"benchmark";
389 std::string mBenchmarkName =
"noname";
390 std::string mUnit =
"op";
392 double mComplexityN = -1.0;
393 size_t mNumEpochs = 11;
394 size_t mClockResolutionMultiple =
static_cast<size_t>(1000);
395 std::chrono::nanoseconds mMaxEpochTime = std::chrono::milliseconds(100);
396 std::chrono::nanoseconds mMinEpochTime = std::chrono::milliseconds(1);
401 std::ostream* mOut =
nullptr;
402 std::chrono::duration<double> mTimeUnit = std::chrono::nanoseconds{1};
403 std::string mTimeUnitName =
"ns";
404 bool mShowPerformanceCounters =
true;
405 bool mIsRelative =
false;
406 std::unordered_map<std::string, std::string> mContext{};
467 std::vector<std::vector<double>> mNameToMeasurements{};
585 inline double uniform01() noexcept;
727 Bench& batch(
T b) noexcept;
932 Bench& complexityN(
T n) noexcept;
992 BigO complexityBigO(
char const*
name, Op op) const;
995 BigO complexityBigO(
std::
string const&
name, Op op) const;
1012 std::vector<Result> mResults{};
1022template <
typename Arg>
1027#if defined(_MSC_VER)
1030template <
typename T>
1038template <
typename T>
1041 asm volatile(
"" : :
"r,m"(val) :
"memory");
1044template <
typename T>
1046# if defined(__clang__)
1048 asm volatile(
"" :
"+r,m"(val) : :
"memory");
1051 asm volatile(
"" :
"+m,r"(val) : :
"memory");
1098#if ANKERL_NANOBENCH(PERF_COUNTERS)
1115 template <
typename Op>
1125 template <
typename Op>
1129 template <
typename Op>
1143 double mNormalizedRootMeanSquare{};
1154namespace nanobench {
1161 return (std::numeric_limits<uint64_t>::max)();
1168 mX =
UINT64_C(15241094284759029579) * mY;
1169 mY =
rotl(mY - x, 27);
1181double Rng::uniform01() noexcept {
1182 auto i = (
UINT64_C(0x3ff) << 52U) | (
operator()() >> 12U);
1186 std::memcpy(&d, &i,
sizeof(
double));
1190template <
typename Container>
1195 auto n = operator()();
1207 return (x <<
k) | (x >> (64U -
k));
1210template <
typename Op>
1215 auto&
pc = detail::performanceCounters();
1219 Clock::time_point
const before = Clock::now();
1223 Clock::time_point
const after = Clock::now();
1233template <
typename Op>
1236 return run(std::forward<Op>(op));
1239template <
typename Op>
1242 return run(std::forward<Op>(op));
1245template <
typename Op>
1250template <
typename Op>
1257template <
typename T>
1259 mConfig.mBatch =
static_cast<double>(b);
1264template <
typename T>
1266 mConfig.mComplexityN =
static_cast<double>(n);
1271template <
typename Arg>
1273 detail::doNotOptimizeAway(std::forward<Arg>(
arg));
1278template <
typename Arg>
1280 detail::doNotOptimizeAway(std::forward<Arg>(
arg));
1285#if defined(_MSC_VER)
1286template <
typename T>
1287void doNotOptimizeAway(
T const& val) {
1297#if defined(ANKERL_NANOBENCH_IMPLEMENT)
1303# include <algorithm>
1313# include <stdexcept>
1315# if defined(__linux__)
1318# if ANKERL_NANOBENCH(PERF_COUNTERS)
1321# include <linux/perf_event.h>
1322# include <sys/ioctl.h>
1323# include <sys/syscall.h>
1329namespace nanobench {
1353namespace nanobench {
1360template <
typename T>
1361inline double d(
T t)
noexcept {
1362 return static_cast<double>(
t);
1364inline double d(Clock::duration
duration)
noexcept {
1365 return std::chrono::duration_cast<std::chrono::duration<double>>(
duration).
count();
1375char const*
csv() noexcept {
1376 return R
"DELIM("title";"name";"unit";"batch";"elapsed";"error %";"instructions";"branches";"branch misses";"total"
1377{{#result}}"{{title}}";"{{name}}";"{{unit}}";{{batch}};{{median(elapsed)}};{{medianAbsolutePercentError(elapsed)}};{{median(instructions)}};{{median(branchinstructions)}};{{median(branchmisses)}};{{sumProduct(iterations, elapsed)}}
1382 return R
"DELIM(<html>
1385 <script src="https://cdn.plot.ly/plotly-latest.min.js"></script>
1389 <div id="myDiv"></div>
1394 y: [{{#measurement}}{{elapsed}}{{^-last}}, {{/last}}{{/measurement}}],
1398 var title = '{{title}}';
1400 data = data.map(a => Object.assign(a, { boxpoints: 'all', pointpos: 0, type: 'box' }));
1401 var layout = { title: { text: title }, showlegend: false, yaxis: { title: 'time per unit', rangemode: 'tozero', autorange: true } }; Plotly.newPlot('myDiv', data, layout, {responsive: true});
1408char const*
pyperf() noexcept {
1415{{#measurement}} {{elapsed}}{{^-last}},
1416{{/last}}{{/measurement}}
1423 "loops": {{sum(iterations)}},
1424 "inner_loops": {{batch}},
1425 "name": "{{title}}",
1432char const* json() noexcept {
1436 "title": "{{title}}",
1440 "complexityN": {{complexityN}},
1441 "epochs": {{epochs}},
1442 "clockResolution": {{clockResolution}},
1443 "clockResolutionMultiple": {{clockResolutionMultiple}},
1444 "maxEpochTime": {{maxEpochTime}},
1445 "minEpochTime": {{minEpochTime}},
1446 "minEpochIterations": {{minEpochIterations}},
1447 "epochIterations": {{epochIterations}},
1448 "warmup": {{warmup}},
1449 "relative": {{relative}},
1450 "median(elapsed)": {{median(elapsed)}},
1451 "medianAbsolutePercentError(elapsed)": {{medianAbsolutePercentError(elapsed)}},
1452 "median(instructions)": {{median(instructions)}},
1453 "medianAbsolutePercentError(instructions)": {{medianAbsolutePercentError(instructions)}},
1454 "median(cpucycles)": {{median(cpucycles)}},
1455 "median(contextswitches)": {{median(contextswitches)}},
1456 "median(pagefaults)": {{median(pagefaults)}},
1457 "median(branchinstructions)": {{median(branchinstructions)}},
1458 "median(branchmisses)": {{median(branchmisses)}},
1459 "totalTime": {{sumProduct(iterations, elapsed)}},
1462 "iterations": {{iterations}},
1463 "elapsed": {{elapsed}},
1464 "pagefaults": {{pagefaults}},
1465 "cpucycles": {{cpucycles}},
1466 "contextswitches": {{contextswitches}},
1467 "instructions": {{instructions}},
1468 "branchinstructions": {{branchinstructions}},
1469 "branchmisses": {{branchmisses}}
1470 }{{^-last}},{{/-last}}
1472 }{{^-last}},{{/-last}}
1483 std::vector<Node> children;
1488 bool operator==(
char const (&str)[
N])
const noexcept {
1490 return static_cast<size_t>(std::distance(begin, end) + 1) ==
N && 0 ==
strncmp(str, begin,
N - 1);
1497 std::vector<Node>
nodes;
1500 auto const* begin = std::strstr(*
tpl,
"{{");
1501 auto const* end = begin;
1502 if (begin !=
nullptr) {
1505 end = std::strstr(begin,
"}}");
1508 if (begin ==
nullptr || end ==
nullptr) {
1511 nodes.emplace_back(Node{*
tpl, *
tpl + std::strlen(*
tpl), std::vector<Node>{}, Node::Type::content});
1516 nodes.emplace_back(Node{*
tpl, begin - 2, std::vector<Node>{}, Node::Type::content});
1537 nodes.emplace_back(Node{begin, end, std::vector<Node>{}, Node::Type::tag});
1543static bool generateFirstLast(Node
const& n,
size_t idx,
size_t size, std::ostream& out) {
1552 if (n.type == Node::Type::section) {
1554 }
else if (n.type == Node::Type::inverted_section) {
1559 for (
auto const&
child : n.children) {
1560 if (
child.type == Node::Type::content) {
1572 if (
idxClose == std::string::npos) {
1581 if (str[i] ==
' ' || str[i] ==
'\t') {
1585 if (str[i] ==
',') {
1596static bool generateConfigTag(Node
const& n, Config
const& config, std::ostream& out) {
1600 out << config.mBenchmarkTitle;
1604 out << config.mBenchmarkName;
1608 out << config.mUnit;
1612 out << config.mBatch;
1615 if (n ==
"complexityN") {
1616 out << config.mComplexityN;
1619 if (n ==
"epochs") {
1620 out << config.mNumEpochs;
1623 if (n ==
"clockResolution") {
1624 out << d(detail::clockResolution());
1627 if (n ==
"clockResolutionMultiple") {
1628 out << config.mClockResolutionMultiple;
1631 if (n ==
"maxEpochTime") {
1632 out << d(config.mMaxEpochTime);
1635 if (n ==
"minEpochTime") {
1636 out << d(config.mMinEpochTime);
1639 if (n ==
"minEpochIterations") {
1640 out << config.mMinEpochIterations;
1643 if (n ==
"epochIterations") {
1644 out << config.mEpochIterations;
1647 if (n ==
"warmup") {
1648 out << config.mWarmup;
1651 if (n ==
"relative") {
1652 out << config.mIsRelative;
1659static std::ostream&
generateResultTag(Node
const& n, Result
const& r, std::ostream& out) {
1676 if (
m == Result::Measure::_size) {
1681 return out << r.median(
m);
1684 return out << r.average(
m);
1686 if (
matchResult[0] ==
"medianAbsolutePercentError") {
1687 return out << r.medianAbsolutePercentError(
m);
1690 return out << r.sum(
m);
1693 return out << r.minimum(
m);
1696 return out << r.maximum(
m);
1701 if (
m1 == Result::Measure::_size ||
m2 == Result::Measure::_size) {
1706 return out << r.sumProduct(
m1,
m2);
1715 throw std::runtime_error(
"command '" + std::string(n.begin, n.end) +
"' not understood");
1719 for (
auto const& n :
nodes) {
1723 case Node::Type::content:
1724 out.write(n.begin, std::distance(n.begin, n.end));
1727 case Node::Type::inverted_section:
1728 throw std::runtime_error(
"got a inverted section inside measurement");
1730 case Node::Type::section:
1731 throw std::runtime_error(
"got a section inside measurement");
1733 case Node::Type::tag: {
1734 auto m = Result::fromString(std::string(n.begin, n.end));
1735 if (
m == Result::Measure::_size || !r.has(
m)) {
1738 out << r.get(idx,
m);
1747static void generateResult(std::vector<Node>
const&
nodes,
size_t idx, std::vector<Result>
const&
results, std::ostream& out) {
1749 for (
auto const& n :
nodes) {
1753 case Node::Type::content:
1754 out.write(n.begin, std::distance(n.begin, n.end));
1757 case Node::Type::inverted_section:
1758 throw std::runtime_error(
"got a inverted section inside result");
1760 case Node::Type::section:
1761 if (n ==
"measurement") {
1762 for (
size_t i = 0; i < r.size(); ++i) {
1766 throw std::runtime_error(
"got a section inside result");
1770 case Node::Type::tag:
1787template <
typename T>
1788T parseFile(std::string
const& filename,
bool* fail);
1804class NumSep :
public std::numpunct<char> {
1835 std::streamsize
const mWidth;
1836 std::ostream::char_type
const mFill;
1850 std::ostream& write(std::ostream&
os)
const;
1885 std::ostream& write(std::ostream&
os)
const;
1887 std::string
mWhat{};
1900namespace nanobench {
1904 detail::fmt::StreamStateRestorer
const restorer(out);
1906 out.precision(std::numeric_limits<double>::digits10);
1909 for (
auto const& n :
nodes) {
1912 case templates::Node::Type::content:
1913 out.write(n.begin, std::distance(n.begin, n.end));
1916 case templates::Node::Type::inverted_section:
1917 throw std::runtime_error(
"unknown list '" + std::string(n.begin, n.end) +
"'");
1919 case templates::Node::Type::section:
1920 if (n ==
"result") {
1922 for (
size_t i = 0; i <
nbResults; ++i) {
1925 }
else if (n ==
"measurement") {
1927 throw std::runtime_error(
1928 "render: can only use section 'measurement' here if there is a single result, but there are " +
1929 detail::fmt::to_s(
results.size()));
1932 auto const& r =
results.front();
1933 for (
size_t i = 0; i < r.size(); ++i) {
1937 throw std::runtime_error(
"render: unknown section '" + std::string(n.begin, n.end) +
"'");
1941 case templates::Node::Type::tag:
1948 throw std::runtime_error(
"unknown tag '" + std::string(n.begin, n.end) +
"'");
1971# if defined(__clang__)
1972# pragma clang diagnostic push
1973# pragma clang diagnostic ignored "-Wexit-time-destructors"
1975 static PerformanceCounters
pc;
1976# if defined(__clang__)
1977# pragma clang diagnostic pop
1986# if defined(_MSC_VER)
1987# pragma optimize("", off)
1989# pragma optimize("", on)
1992template <
typename T>
1993T parseFile(std::string
const& filename,
bool* fail) {
1994 std::ifstream
fin(filename);
1997 if (fail !=
nullptr) {
2004# if defined(_MSC_VER)
2005# pragma warning(push)
2006# pragma warning(disable : 4996)
2008 return std::getenv(
name);
2009# if defined(_MSC_VER)
2010# pragma warning(pop)
2030 warnings.emplace_back(
"DEBUG defined");
2037# if defined(__linux__)
2040 warnings.emplace_back(
"couldn't figure out number of processors - no governor, turbo check possible");
2043 for (
long id = 0;
id <
nprocs; ++id) {
2045 auto sysCpu =
"/sys/devices/system/cpu/cpu" +
idStr;
2051 warnings.emplace_back(
"CPU frequency scaling enabled: CPU " +
idStr +
" between " +
2052 detail::fmt::Number(1, 1,
minMHz).
to_s() +
" and " + detail::fmt::Number(1, 1,
maxMHz).
to_s() +
2062 warnings.emplace_back(
"CPU governor is '" +
currentGovernor +
"' but should be 'performance'");
2068 warnings.emplace_back(
"Turbo is enabled, CPU frequency will fluctuate");
2078 recommendations.emplace_back(
"Use 'pyperf system tune' before benchmarking. See https://github.com/psf/pyperf");
2087 std::vector<std::string> warnings;
2090 if (warnings.empty()) {
2094 os <<
"Warning, results might be unstable:" << std::endl;
2095 for (
auto const&
w : warnings) {
2096 os <<
"* " <<
w << std::endl;
2099 os << std::endl <<
"Recommendations" << std::endl;
2101 os <<
"* " << r << std::endl;
2114 return seed ^ (val +
UINT64_C(0x9e3779b9) + (seed << 6U) + (seed >> 2U));
2120 Clock::time_point
tBegin;
2121 Clock::time_point
tEnd;
2125 tEnd = Clock::now();
2139struct IterationLogic::Impl {
2142 explicit Impl(Bench
const&
bench)
2157 std::cerr <<
"NANOBENCH_ENDLESS set: running '" <<
mBench.name() <<
"' endlessly" << std::endl;
2158 mNumIters = (std::numeric_limits<uint64_t>::max)();
2160 }
else if (0 !=
mBench.warmup()) {
2163 }
else if (0 !=
mBench.epochIterations()) {
2166 mState = State::measuring;
2169 mState = State::upscaling_runtime;
2195 showResult(
"iterations overflow. Maybe your code got optimized away?");
2205 void add(std::chrono::nanoseconds elapsed, PerformanceCounters
const&
pc)
noexcept {
2206# if defined(ANKERL_NANOBENCH_LOG_ENABLED)
2215 mState = State::measuring;
2219 mState = State::upscaling_runtime;
2224 case State::upscaling_runtime:
2227 mState = State::measuring;
2237 case State::measuring:
2243 if (0 !=
mBench.epochIterations()) {
2250 case State::endless:
2251 mNumIters = (std::numeric_limits<uint64_t>::max)();
2270 if (
mBench.output() !=
nullptr) {
2272 std::vector<fmt::MarkDownColumn> columns;
2278 if (!
mBench.results().empty()) {
2279 d =
rMedian <= 0.0 ? 0.0 :
mBench.results().front().median(Result::Measure::elapsed) /
rMedian * 100.0;
2281 columns.emplace_back(11, 1,
"relative",
"%", d);
2284 if (
mBench.complexityN() > 0) {
2285 columns.emplace_back(14, 0,
"complexityN",
"",
mBench.complexityN());
2288 columns.emplace_back(22, 2,
mBench.timeUnitName() +
"/" +
mBench.unit(),
"",
2293 columns.emplace_back(10, 1,
"err%",
"%",
rErrorMedian * 100.0);
2296 if (
mBench.performanceCounters() &&
mResult.has(Result::Measure::instructions)) {
2302 if (
mBench.performanceCounters() &&
mResult.has(Result::Measure::cpucycles)) {
2309 if (
mBench.performanceCounters() &&
mResult.has(Result::Measure::branchinstructions)) {
2312 if (
mResult.has(Result::Measure::branchmisses)) {
2317 columns.emplace_back(10, 1,
"miss%",
"%",
p);
2321 columns.emplace_back(12, 2,
"total",
"",
mResult.sumProduct(Result::Measure::iterations, Result::Measure::elapsed));
2340 for (
auto const&
col : columns) {
2343 os <<
"| " <<
mBench.title() << std::endl;
2345 for (
auto const&
col : columns) {
2346 os <<
col.separator();
2348 os <<
"|:" << std::string(
mBench.title().size() + 1U,
'-') << std::endl;
2352 for (
auto const&
col : columns) {
2353 os <<
col.invalid();
2355 os <<
"| :boom: " << fmt::MarkDownCode(
mBench.name()) <<
" (" <<
errorMessage <<
')' << std::endl;
2357 for (
auto const&
col : columns) {
2363 os <<
":wavy_dash: ";
2365 os << fmt::MarkDownCode(
mBench.name());
2371 os <<
" (Unstable with ~" << detail::fmt::Number(1, 1,
avgIters)
2372 <<
" iters. Increase `minEpochIterations` to e.g. " <<
suggestedIters <<
")";
2394IterationLogic::IterationLogic(Bench
const&
bench)
2397IterationLogic::~IterationLogic() {
2401uint64_t IterationLogic::numIters() const noexcept {
2403 return mPimpl->mNumIters;
2406void IterationLogic::add(std::chrono::nanoseconds elapsed, PerformanceCounters
const&
pc)
noexcept {
2407 mPimpl->add(elapsed,
pc);
2410void IterationLogic::moveResultTo(std::vector<Result>&
results)
noexcept {
2411 results.emplace_back(std::move(mPimpl->mResult));
2414# if ANKERL_NANOBENCH(PERF_COUNTERS)
2438 inline void start() {}
2440 inline void stop() {}
2451 inline void beginMeasure() {
2466 inline void endMeasure() {
2485 template <
typename T>
2498 template <
typename Op>
2509 v = (std::numeric_limits<uint64_t>::max)();
2511 for (
size_t iter = 0; iter < 100; ++iter) {
2543 detail::doNotOptimizeAway(x);
2554 detail::doNotOptimizeAway(x);
2557 for (
size_t i = 0; i <
mCounters.size(); ++i) {
2585LinuxPerformanceCounters::~LinuxPerformanceCounters() {
2595bool LinuxPerformanceCounters::monitor(
perf_hw_id hwId, LinuxPerformanceCounters::Target
target) {
2615 auto idx =
static_cast<size_t>(3 + i * 2 + 0);
2621 auto&
tgt = it->second;
2623 if (
tgt.correctMeasuringOverhead) {
2627 *
tgt.targetValue = 0
U;
2630 if (
tgt.correctLoopOverhead) {
2635 *
tgt.targetValue = 0
U;
2643 *
target.targetValue = (std::numeric_limits<uint64_t>::max)();
2654 pea.exclude_kernel = 1;
2662# if defined(PERF_FLAG_FD_CLOEXEC)
2665 const unsigned long flags = 0;
2696PerformanceCounters::PerformanceCounters()
2703 if (!mHas.cpuCycles) {
2708 mHas.branchInstructions =
2715 mHas.contextSwitches =
2720 auto before = ankerl::nanobench::Clock::now();
2721 auto after = ankerl::nanobench::Clock::now();
2726 if (
mPc->hasError()) {
2728 mHas = PerfCountSet<bool>{};
2732PerformanceCounters::~PerformanceCounters() {
2737void PerformanceCounters::beginMeasure() {
2738 mPc->beginMeasure();
2741void PerformanceCounters::endMeasure() {
2751PerformanceCounters::PerformanceCounters() =
default;
2752PerformanceCounters::~PerformanceCounters() =
default;
2753void PerformanceCounters::beginMeasure() {}
2754void PerformanceCounters::endMeasure() {}
2755void PerformanceCounters::updateResults(
uint64_t) {}
2770NumSep::NumSep(
char sep)
2773char NumSep::do_thousands_sep()
const {
2777std::string NumSep::do_grouping()
const {
2782StreamStateRestorer::StreamStateRestorer(std::ostream& s)
2790StreamStateRestorer::~StreamStateRestorer() {
2795void StreamStateRestorer::restore() {
2813std::ostream& Number::write(std::ostream&
os)
const {
2815 os.imbue(std::locale(
os.getloc(),
new NumSep(
',')));
2820std::string Number::to_s()
const {
2821 std::stringstream
ss;
2829 str +=
static_cast<char>(
'0' +
static_cast<char>(n % 10));
2832 std::reverse(str.begin(), str.end());
2840MarkDownColumn::MarkDownColumn(
int w,
int prec, std::string
tit, std::string
suff,
double val) noexcept
2847std::string MarkDownColumn::title()
const {
2848 std::stringstream
ss;
2849 ss <<
'|' << std::setw(
mWidth - 2) << std::right <<
mTitle <<
' ';
2853std::string MarkDownColumn::separator()
const {
2854 std::string
sep(
static_cast<size_t>(
mWidth),
'-');
2860std::string MarkDownColumn::invalid()
const {
2861 std::string
sep(
static_cast<size_t>(
mWidth),
' ');
2863 sep[
sep.size() - 2] =
'-';
2867std::string MarkDownColumn::value()
const {
2868 std::stringstream
ss;
2875MarkDownCode::MarkDownCode(std::string
const& what) {
2876 mWhat.reserve(what.size() + 2);
2877 mWhat.push_back(
'`');
2878 for (
char const c : what) {
2881 mWhat.push_back(
'`');
2884 mWhat.push_back(
'`');
2887std::ostream& MarkDownCode::write(std::ostream&
os)
const {
2898Config::Config() =
default;
2899Config::~Config() =
default;
2900Config& Config::operator=(Config
const&) =
default;
2902Config::Config(Config const&) =
default;
2903Config::Config(Config&&) noexcept =
default;
2907Result& Result::operator=(Result const&) =
default;
2909Result::Result(Result const&) =
default;
2910Result::Result(Result&&) noexcept =
default;
2913template <
typename T>
2914inline constexpr typename std::underlying_type<T>::type u(
T val)
noexcept {
2915 return static_cast<typename std::underlying_type<T>::type
>(val);
2922 , mNameToMeasurements{
detail::u(
Result::Measure::_size)} {}
2929 mNameToMeasurements[u(Result::Measure::iterations)].push_back(
dIters);
2931 mNameToMeasurements[u(Result::Measure::elapsed)].push_back(d(
totalElapsed) /
dIters);
2932 if (
pc.has().pageFaults) {
2933 mNameToMeasurements[u(Result::Measure::pagefaults)].push_back(d(
pc.val().pageFaults) /
dIters);
2935 if (
pc.has().cpuCycles) {
2936 mNameToMeasurements[u(Result::Measure::cpucycles)].push_back(d(
pc.val().cpuCycles) /
dIters);
2938 if (
pc.has().contextSwitches) {
2939 mNameToMeasurements[u(Result::Measure::contextswitches)].push_back(d(
pc.val().contextSwitches) /
dIters);
2941 if (
pc.has().instructions) {
2942 mNameToMeasurements[u(Result::Measure::instructions)].push_back(d(
pc.val().instructions) /
dIters);
2944 if (
pc.has().branchInstructions) {
2945 double branchInstructions = 0.0;
2947 if (
pc.val().branchInstructions >
iters + 1U) {
2948 branchInstructions = d(
pc.val().branchInstructions - (
iters + 1U));
2950 mNameToMeasurements[u(Result::Measure::branchinstructions)].push_back(branchInstructions /
dIters);
2952 if (
pc.has().branchMisses) {
2954 double branchMisses = d(
pc.val().branchMisses);
2955 if (branchMisses > branchInstructions) {
2957 branchMisses = branchInstructions;
2961 branchMisses -= 1.0;
2962 if (branchMisses < 1.0) {
2965 mNameToMeasurements[u(Result::Measure::branchmisses)].push_back(branchMisses /
dIters);
2970Config
const& Result::config() const noexcept {
2974inline double calcMedian(std::vector<double>& data) {
2978 std::sort(
data.begin(),
data.end());
2981 if (1U == (
data.size() & 1U)) {
2987double Result::median(Measure
m)
const {
2989 auto data = mNameToMeasurements[detail::u(
m)];
2993double Result::average(Measure
m)
const {
2995 auto const&
data = mNameToMeasurements[detail::u(
m)];
3004double Result::medianAbsolutePercentError(Measure
m)
const {
3006 auto data = mNameToMeasurements[detail::u(
m)];
3013 for (
auto& x :
data) {
3022double Result::sum(Measure
m)
const noexcept {
3023 auto const&
data = mNameToMeasurements[detail::u(
m)];
3024 return std::accumulate(
data.begin(),
data.end(), 0.0);
3027double Result::sumProduct(Measure
m1, Measure
m2)
const noexcept {
3028 auto const&
data1 = mNameToMeasurements[detail::u(
m1)];
3029 auto const&
data2 = mNameToMeasurements[detail::u(
m2)];
3035 double result = 0.0;
3036 for (
size_t i = 0, s =
data1.size(); i !=
s; ++i) {
3042bool Result::has(Measure
m)
const noexcept {
3043 return !mNameToMeasurements[detail::u(
m)].empty();
3046double Result::get(
size_t idx, Measure
m)
const {
3047 auto const&
data = mNameToMeasurements[detail::u(
m)];
3048 return data.at(idx);
3051bool Result::empty() const noexcept {
3052 return 0
U == size();
3055size_t Result::size() const noexcept {
3056 auto const&
data = mNameToMeasurements[detail::u(Measure::elapsed)];
3060double Result::minimum(Measure
m)
const noexcept {
3061 auto const&
data = mNameToMeasurements[detail::u(
m)];
3067 return *std::min_element(
data.begin(),
data.end());
3070double Result::maximum(Measure
m)
const noexcept {
3071 auto const&
data = mNameToMeasurements[detail::u(
m)];
3077 return *std::max_element(
data.begin(),
data.end());
3080std::string
const& Result::context(
char const*
variableName)
const {
3084std::string
const& Result::context(std::string
const&
variableName)
const {
3088Result::Measure Result::fromString(std::string
const& str) {
3089 if (str ==
"elapsed") {
3090 return Measure::elapsed;
3092 if (str ==
"iterations") {
3093 return Measure::iterations;
3095 if (str ==
"pagefaults") {
3096 return Measure::pagefaults;
3098 if (str ==
"cpucycles") {
3099 return Measure::cpucycles;
3101 if (str ==
"contextswitches") {
3102 return Measure::contextswitches;
3104 if (str ==
"instructions") {
3105 return Measure::instructions;
3107 if (str ==
"branchinstructions") {
3108 return Measure::branchinstructions;
3110 if (str ==
"branchmisses") {
3111 return Measure::branchmisses;
3114 return Measure::_size;
3119 mConfig.mOut = &std::cout;
3122Bench::Bench(Bench&&) noexcept =
default;
3124Bench::Bench(Bench const&) =
default;
3125Bench& Bench::operator=(Bench const&) =
default;
3126Bench::~Bench() noexcept =
default;
3128double Bench::batch() const noexcept {
3129 return mConfig.mBatch;
3132double Bench::complexityN() const noexcept {
3133 return mConfig.mComplexityN;
3142bool Bench::relative() const noexcept {
3143 return mConfig.mIsRelative;
3150bool Bench::performanceCounters() const noexcept {
3151 return mConfig.mShowPerformanceCounters;
3157Bench& Bench::unit(
char const* u) {
3158 if (u != mConfig.mUnit) {
3165Bench& Bench::unit(std::string
const& u) {
3166 return unit(u.c_str());
3169std::string
const& Bench::unit() const noexcept {
3170 return mConfig.mUnit;
3173Bench& Bench::timeUnit(std::chrono::duration<double>
const&
tu, std::string
const&
tuName) {
3174 mConfig.mTimeUnit =
tu;
3175 mConfig.mTimeUnitName =
tuName;
3179std::string
const& Bench::timeUnitName() const noexcept {
3180 return mConfig.mTimeUnitName;
3183std::chrono::duration<double>
const& Bench::timeUnit() const noexcept {
3184 return mConfig.mTimeUnit;
3203std::string
const& Bench::title() const noexcept {
3204 return mConfig.mBenchmarkTitle;
3217std::string
const& Bench::name() const noexcept {
3218 return mConfig.mBenchmarkName;
3231Bench& Bench::clearContext() {
3232 mConfig.mContext.clear();
3237Bench& Bench::epochs(
size_t numEpochs)
noexcept {
3241size_t Bench::epochs() const noexcept {
3242 return mConfig.mNumEpochs;
3246Bench& Bench::clockResolutionMultiple(
size_t multiple)
noexcept {
3247 mConfig.mClockResolutionMultiple =
multiple;
3250size_t Bench::clockResolutionMultiple() const noexcept {
3251 return mConfig.mClockResolutionMultiple;
3255Bench& Bench::maxEpochTime(std::chrono::nanoseconds
t)
noexcept {
3256 mConfig.mMaxEpochTime =
t;
3259std::chrono::nanoseconds Bench::maxEpochTime() const noexcept {
3260 return mConfig.mMaxEpochTime;
3264Bench& Bench::minEpochTime(std::chrono::nanoseconds
t)
noexcept {
3265 mConfig.mMinEpochTime =
t;
3268std::chrono::nanoseconds Bench::minEpochTime() const noexcept {
3269 return mConfig.mMinEpochTime;
3276uint64_t Bench::minEpochIterations() const noexcept {
3277 return mConfig.mMinEpochIterations;
3281 mConfig.mEpochIterations =
numIters;
3284uint64_t Bench::epochIterations() const noexcept {
3285 return mConfig.mEpochIterations;
3292uint64_t Bench::warmup() const noexcept {
3293 return mConfig.mWarmup;
3300Config
const& Bench::config() const noexcept {
3304Bench& Bench::output(std::ostream*
outstream)
noexcept {
3310 return mConfig.mOut;
3313std::vector<Result>
const& Bench::results() const noexcept {
3327std::vector<BigO> Bench::complexityBigO()
const {
3328 std::vector<BigO>
bigOs;
3329 auto rangeMeasure = BigO::collectRangeMeasure(mResults);
3337 return std::log2(n);
3340 return n * std::log2(n);
3355 std::random_device
rd;
3356 std::uniform_int_distribution<uint64_t>
dist;
3360 }
while (mX == 0 && mY == 0);
3366 z = (z ^ (z >> 30U)) *
UINT64_C(0xbf58476d1ce4e5b9);
3367 z = (z ^ (z >> 27U)) *
UINT64_C(0x94d049bb133111eb);
3368 return z ^ (z >> 31U);
3375 for (
size_t i = 0; i < 10; ++i) {
3385Rng Rng::copy() const noexcept {
3389Rng::Rng(std::vector<uint64_t>
const& data)
3392 if (
data.size() != 2) {
3393 throw std::runtime_error(
"ankerl::nanobench::Rng::Rng: needed exactly 2 entries in data, but got " +
3394 detail::fmt::to_s(
data.size()));
3400std::vector<uint64_t> Rng::state()
const {
3401 std::vector<uint64_t>
data(2);
3407BigO::RangeMeasure BigO::collectRangeMeasure(std::vector<Result>
const&
results) {
3409 for (
auto const& result :
results) {
3410 if (result.config().mComplexityN > 0.0) {
3411 rangeMeasure.emplace_back(result.config().mComplexityN, result.median(Result::Measure::elapsed));
3434 auto diff = mConstant *
rm.first -
rm.second;
3442 mNormalizedRootMeanSquare = std::sqrt(err / n) /
mean;
3448std::string
const& BigO::name() const noexcept {
3452double BigO::constant() const noexcept {
3456double BigO::normalizedRootMeanSquare() const noexcept {
3457 return mNormalizedRootMeanSquare;
3460bool BigO::operator<(BigO
const& other)
const noexcept {
3461 return std::tie(mNormalizedRootMeanSquare, mName) < std::tie(other.mNormalizedRootMeanSquare, other.mName);
3465 return os <<
bigO.constant() <<
" * " <<
bigO.name() <<
", rms=" <<
bigO.normalizedRootMeanSquare();
3468std::ostream&
operator<<(std::ostream&
os, std::vector<ankerl::nanobench::BigO>
const&
bigOs) {
3469 detail::fmt::StreamStateRestorer
const restorer(
os);
3470 os << std::endl <<
"| coefficient | err% | complexity" << std::endl <<
"|--------------:|-------:|------------" << std::endl;
3472 os <<
"|" << std::setw(14) << std::setprecision(7) << std::scientific <<
bigO.constant() <<
" ";
3473 os <<
"|" << detail::fmt::Number(6, 1,
bigO.normalizedRootMeanSquare() * 100.0) <<
"% ";
3474 os <<
"| " <<
bigO.name();
Main entry point to nanobench's benchmarking facility.
Bench & operator=(Bench const &other)
Bench()
Creates a new benchmark for configuration and running of benchmarks.
Bench & operator=(Bench &&other) noexcept(ANKERL_NANOBENCH(NOEXCEPT_STRING_MOVE))
Bench(Bench &&other) noexcept
Bench(Bench const &other)
static RangeMeasure mapRangeMeasure(RangeMeasure data, Op op)
BigO(std::string bigOName, RangeMeasure const &scaledRangeMeasure)
BigO(char const *bigOName, RangeMeasure const &rangeMeasure, Op rangeToN)
static RangeMeasure collectRangeMeasure(std::vector< Result > const &results)
BigO(std::string bigOName, RangeMeasure const &rangeMeasure, Op rangeToN)
std::vector< std::pair< double, double > > RangeMeasure
BigO(char const *bigOName, RangeMeasure const &scaledRangeMeasure)
Result(Config benchmarkConfig)
void add(Clock::duration totalElapsed, uint64_t iters, detail::PerformanceCounters const &pc)
Result(Result &&other) noexcept
ANKERL_NANOBENCH(NODISCARD) Config const &config() const noexcept
Result & operator=(Result const &other)
Result(Result const &other)
Result & operator=(Result &&other) noexcept(ANKERL_NANOBENCH(NOEXCEPT_STRING_MOVE))
An extremely fast random generator.
Rng(Rng const &)=delete
As a safety precaution, we don't allow copying.
Rng(Rng &&) noexcept=default
Rng & operator=(Rng const &)=delete
Same as Rng(Rng const&), we don't allow assignment.
uint64_t result_type
This RNG provides 64bit randomness.
IterationLogic(IterationLogic &&)=delete
IterationLogic & operator=(IterationLogic const &)=delete
ANKERL_NANOBENCH(NODISCARD) uint64_t numIters() const noexcept
IterationLogic(IterationLogic const &)=delete
IterationLogic(Bench const &bench)
IterationLogic & operator=(IterationLogic &&)=delete
static OutputStream & operator<<(OutputStream &os, const Array &array)
#define T(expected, seed, data)
PerformanceCounters & performanceCounters()
void doNotOptimizeAway(T const &val)
char const * csv() noexcept
CSV data for the benchmark results.
char const * pyperf() noexcept
Output in pyperf compatible JSON format, which can be used for more analyzation.
char const * htmlBoxplot() noexcept
HTML output that uses plotly to generate an interactive boxplot chart. See the tutorial for an exampl...
void render(char const *mustacheTemplate, Bench const &bench, std::ostream &out)
Renders output from a mustache-like template and benchmark results.
std::conditional< std::chrono::high_resolution_clock::is_steady, std::chrono::high_resolution_clock, std::chrono::steady_clock >::type Clock
std::ostream & operator<<(std::ostream &os, BigO const &bigO)
void doNotOptimizeAway(Arg &&arg)
Makes sure none of the given arguments are optimized away by the compiler.
#define ANKERL_NANOBENCH_LOG(x)
#define ANKERL_NANOBENCH_NO_SANITIZE(...)
#define ANKERL_NANOBENCH(x)
bool operator==(const CNetAddr &a, const CNetAddr &b)
Config & operator=(Config const &other)
Config(Config const &other)
Config & operator=(Config &&other) noexcept(ANKERL_NANOBENCH(NOEXCEPT_STRING_MOVE))
Config(Config &&other) noexcept
static SECP256K1_INLINE uint64_t rotl(const uint64_t x, int k)
constexpr auto Ticks(Dur2 d)
Helper to count the seconds of a duration/time_point.