Monero
Loading...
Searching...
No Matches
stats.inl
Go to the documentation of this file.
1#include <math.h>
2#include <limits>
3#include <algorithm>
4#include "misc_language.h"
5#include "stats.h"
6
7enum
8{
9 bit_min = 0,
17};
18
19static inline double square(double x)
20{
21 return x * x;
22}
23
24template<typename T>
25static inline double interpolate(T v, T v0, double i0, T v1, double i1)
26{
27 return i0 + (i1 - i0) * (v - v0) / (v1 - v0);
28}
29
30template<typename T, typename Tpod>
31inline bool Stats<T, Tpod>::is_cached(int bit) const
32{
33 return cached & (1<<bit);
34}
35
36template<typename T, typename Tpod>
37inline void Stats<T, Tpod>::set_cached(int bit) const
38{
39 cached |= 1<<bit;
40}
41
42template<typename T, typename Tpod>
44{
45 return values.size();
46}
47
48template<typename T, typename Tpod>
50{
51 if (!is_cached(bit_min))
52 {
53 min = std::numeric_limits<Tpod>::max();
54 for (const T &v: values)
55 min = std::min<Tpod>(min, v);
57 }
58 return min;
59}
60
61template<typename T, typename Tpod>
63{
64 if (!is_cached(bit_max))
65 {
66 max = std::numeric_limits<Tpod>::min();
67 for (const T &v: values)
68 max = std::max<Tpod>(max, v);
70 }
71 return max;
72}
73
74template<typename T, typename Tpod>
76{
78 {
79 std::vector<Tpod> sorted;
80 sorted.reserve(values.size());
81 for (const T &v: values)
82 sorted.push_back(v);
83 std::sort(sorted.begin(), sorted.end());
84 if (sorted.size() & 1)
85 {
86 median = sorted[sorted.size() / 2];
87 }
88 else
89 {
90 median = epee::misc_utils::get_mid(sorted[(sorted.size() - 1) / 2], sorted[sorted.size() / 2]);
91 }
93 }
94 return median;
95}
96
97template<typename T, typename Tpod>
99{
100 if (values.empty())
101 return 0.0;
102 if (!is_cached(bit_mean))
103 {
104 mean = 0.0;
105 for (const T &v: values)
106 mean += v;
107 mean /= values.size();
109 }
110 return mean;
111}
112
113template<typename T, typename Tpod>
114double Stats<T, Tpod>::get_cdf95(size_t df) const
115{
116 static const double p[101] = {
117 -1, 12.706, 4.3027, 3.1824, 2.7765, 2.5706, 2.4469, 2.3646, 2.3060, 2.2622, 2.2281, 2.2010, 2.1788, 2.1604, 2.1448, 2.1315,
118 2.1199, 2.1098, 2.1009, 2.0930, 2.0860, 2.0796, 2.0739, 2.0687, 2.0639, 2.0595, 2.0555, 2.0518, 2.0484, 2.0452, 2.0423, 2.0395,
119 2.0369, 2.0345, 2.0322, 2.0301, 2.0281, 2.0262, 2.0244, 2.0227, 2.0211, 2.0195, 2.0181, 2.0167, 2.0154, 2.0141, 2.0129, 2.0117,
120 2.0106, 2.0096, 2.0086, 2.0076, 2.0066, 2.0057, 2.0049, 2.0040, 2.0032, 2.0025, 2.0017, 2.0010, 2.0003, 1.9996, 1.9990, 1.9983,
121 1.9977, 1.9971, 1.9966, 1.9960, 1.9955, 1.9949, 1.9944, 1.9939, 1.9935, 1.9930, 1.9925, 1.9921, 1.9917, 1.9913, 1.9908, 1.9905,
122 1.9901, 1.9897, 1.9893, 1.9890, 1.9886, 1.9883, 1.9879, 1.9876, 1.9873, 1.9870, 1.9867, 1.9864, 1.9861, 1.9858, 1.9855, 1.9852,
123 1.9850, 1.9847, 1.9845, 1.9842, 1.9840,
124 };
125 if (df <= 100)
126 return p[df];
127 if (df <= 120)
128 return interpolate<size_t>(df, 100, 1.9840, 120, 1.98);
129 return 1.96;
130}
131
132template<typename T, typename Tpod>
133double Stats<T, Tpod>::get_cdf95(const Stats<T> &other) const
134{
135 return get_cdf95(get_size() + other.get_size() - 2);
136}
137
138template<typename T, typename Tpod>
139double Stats<T, Tpod>::get_cdf99(size_t df) const
140{
141 static const double p[101] = {
142 -1, 9.9250, 5.8408, 4.6041, 4.0321, 3.7074, 3.4995, 3.3554, 3.2498, 3.1693, 3.1058, 3.0545, 3.0123, 2.9768, 2.9467, 2.9208, 2.8982,
143 2.8784, 2.8609, 2.8453, 2.8314, 2.8188, 2.8073, 2.7970, 2.7874, 2.7787, 2.7707, 2.7633, 2.7564, 2.7500, 2.7440, 2.7385, 2.7333,
144 2.7284, 2.7238, 2.7195, 2.7154, 2.7116, 2.7079, 2.7045, 2.7012, 2.6981, 2.6951, 2.6923, 2.6896, 2.6870, 2.6846, 2.6822, 2.6800,
145 2.6778, 2.6757, 2.6737, 2.6718, 2.6700, 2.6682, 2.6665, 2.6649, 2.6633, 2.6618, 2.6603, 2.6589, 2.6575, 2.6561, 2.6549, 2.6536,
146 2.6524, 2.6512, 2.6501, 2.6490, 2.6479, 2.6469, 2.6458, 2.6449, 2.6439, 2.6430, 2.6421, 2.6412, 2.6403, 2.6395, 2.6387, 2.6379,
147 2.6371, 2.6364, 2.6356, 2.6349, 2.6342, 2.6335, 2.6329, 2.6322, 2.6316, 2.6309, 2.6303, 2.6297, 2.6291, 2.6286, 2.6280, 2.6275,
148 2.6269, 2.6264, 2.6259,
149 };
150 if (df <= 100)
151 return p[df];
152 if (df <= 120)
153 return interpolate<size_t>(df, 100, 2.6529, 120, 2.617);
154 return 2.576;
155}
156
157template<typename T, typename Tpod>
158double Stats<T, Tpod>::get_cdf99(const Stats<T> &other) const
159{
160 return get_cdf99(get_size() + other.get_size() - 2);
161}
162
163template<typename T, typename Tpod>
165{
166 const size_t df = get_size() - 1;
167 return get_standard_error() * get_cdf95(df);
168}
169
170template<typename T, typename Tpod>
172{
173 const size_t df = get_size() - 1;
174 return get_standard_error() * get_cdf99(df);
175}
176
177template<typename T, typename Tpod>
178bool Stats<T, Tpod>::is_same_distribution_95(size_t npoints, double mean, double stddev) const
179{
180 return fabs(get_t_test(npoints, mean, stddev)) < get_cdf95(get_size() + npoints - 2);
181}
182
183template<typename T, typename Tpod>
185{
186 return fabs(get_t_test(other)) < get_cdf95(other);
187}
188
189template<typename T, typename Tpod>
190bool Stats<T, Tpod>::is_same_distribution_99(size_t npoints, double mean, double stddev) const
191{
192 return fabs(get_t_test(npoints, mean, stddev)) < get_cdf99(get_size() + npoints - 2);
193}
194
195template<typename T, typename Tpod>
197{
198 return fabs(get_t_test(other)) < get_cdf99(other);
199}
200
201template<typename T, typename Tpod>
203{
204 if (values.size() <= 1)
205 return 0.0;
207 {
208 Tpod m = get_mean(), t = 0;
209 for (const T &v: values)
210 t += ((T)v - m) * ((T)v - m);
211 standard_deviation = sqrt(t / ((double)values.size() - 1));
213 }
214 return standard_deviation;
215}
216
217template<typename T, typename Tpod>
227
228template<typename T, typename Tpod>
230{
232 {
233 double stddev = get_standard_deviation();
234 variance = stddev * stddev;
236 }
237 return variance;
238}
239
240template<typename T, typename Tpod>
242{
243 if (values.empty())
244 return 0.0;
246 {
247 double m = get_mean();
248 double n = 0, d = 0;
249 for (const T &v: values)
250 {
251 T p2 = (v - m) * (v - m);
252 T p4 = p2 * p2;
253 n += p4;
254 d += p2;
255 }
256 n /= values.size();
257 d /= values.size();
258 d *= d;
259 kurtosis = n / d;
261 }
262 return kurtosis;
263}
264
265template<typename T, typename Tpod>
270
271template<typename T, typename Tpod>
273{
274 const double n = get_mean() - t;
275 const double d = get_standard_deviation() / sqrt(get_size());
276 return n / d;
277}
278
279template<typename T, typename Tpod>
280double Stats<T, Tpod>::get_t_test(size_t npoints, double mean, double stddev) const
281{
282 const double n = get_mean() - mean;
283 const double d = sqrt(get_variance() / get_size() + square(stddev) / npoints);
284 return n / d;
285}
286
287template<typename T, typename Tpod>
288double Stats<T, Tpod>::get_t_test(const Stats<T> &other) const
289{
290 const double n = get_mean() - other.get_mean();
291 const double d = sqrt(get_variance() / get_size() + other.get_variance() / other.get_size());
292 return n / d;
293}
294
295template<typename T, typename Tpod>
296double Stats<T, Tpod>::get_z_test(const Stats<T> &other) const
297{
298 const double m0 = get_mean();
299 const double m1 = other.get_mean();
300 const double sd0 = get_standard_deviation();
301 const double sd1 = other.get_standard_deviation();
302 const size_t s0 = get_size();
303 const size_t s1 = other.get_size();
304
305 const double n = m0 - m1;
306 const double d = sqrt(square(sd0 / sqrt(s0)) + square(sd1 / sqrt(s1)));
307
308 return n / d;
309}
310
311template<typename T, typename Tpod>
312double Stats<T, Tpod>::get_test(const Stats<T> &other) const
313{
314 if (get_size() >= 30 && other.get_size() >= 30)
315 return get_z_test(other);
316 else
317 return get_t_test(other);
318}
319
320template<typename T, typename Tpod>
321std::vector<Tpod> Stats<T, Tpod>::get_quantiles(unsigned int quantiles) const
322{
323 std::vector<Tpod> sorted;
324 sorted.reserve(values.size());
325 for (const T &v: values)
326 sorted.push_back(v);
327 std::sort(sorted.begin(), sorted.end());
328 std::vector<Tpod> q(quantiles + 1, 0);
329 for (unsigned int i = 1; i <= quantiles; ++i)
330 {
331 unsigned idx = (unsigned)ceil(values.size() * i / (double)quantiles);
332 q[i] = sorted[idx - 1];
333 }
334 if (!is_cached(bit_min))
335 {
336 min = sorted.front();
338 }
339 q[0] = min;
340 if (!is_cached(bit_max))
341 {
342 max = sorted.back();
344 }
345 return q;
346}
347
348template<typename T, typename Tpod>
349std::vector<size_t> Stats<T, Tpod>::get_bins(unsigned int bins) const
350{
351 std::vector<size_t> b(bins, 0);
352 const double scale = 1.0 / (get_max() - get_min());
353 const T base = get_min();
354 for (const T &v: values)
355 {
356 unsigned int idx = (v - base) * scale;
357 ++b[idx];
358 }
359 return b;
360}
#define v0(p)
Definition aesb.c:116
#define v1(p)
Definition aesb.c:117
cryptonote::block b
Definition block.cpp:40
double get_confidence_interval_95() const
Definition stats.inl:164
void set_cached(int bit) const
Definition stats.inl:37
double get_variance() const
Definition stats.inl:229
double get_standard_deviation() const
Definition stats.inl:202
size_t get_size() const
Definition stats.inl:43
double kurtosis
Definition stats.h:55
bool is_same_distribution_99(size_t npoints, double mean, double stddev) const
Definition stats.inl:190
Tpod median
Definition stats.h:50
double standard_deviation
Definition stats.h:52
const std::vector< T > & values
Definition stats.h:45
double variance
Definition stats.h:54
double get_kurtosis() const
Definition stats.inl:241
double get_t_test(T t) const
Definition stats.inl:272
bool is_same_distribution_95(size_t npoints, double mean, double stddev) const
Definition stats.inl:178
double get_z_test(const Stats< T > &other) const
Definition stats.inl:296
Tpod max
Definition stats.h:49
double get_mean() const
Definition stats.inl:98
double get_cdf99(size_t df) const
Definition stats.inl:139
Tpod min
Definition stats.h:48
double mean
Definition stats.h:51
Stats(const std::vector< T > &v)
Definition stats.h:9
double standard_error
Definition stats.h:53
double get_test(const Stats< T > &other) const
Definition stats.inl:312
double get_confidence_interval_99() const
Definition stats.inl:171
double get_cdf95(size_t df) const
Definition stats.inl:114
uint64_t cached
Definition stats.h:47
Tpod get_min() const
Definition stats.inl:49
bool is_cached(int bit) const
Definition stats.inl:31
double get_non_parametric_skew() const
Definition stats.inl:266
std::vector< size_t > get_bins(unsigned int bins) const
Definition stats.inl:349
std::vector< Tpod > get_quantiles(unsigned int quantiles) const
Definition stats.inl:321
double get_standard_error() const
Definition stats.inl:218
Tpod get_median() const
Definition stats.inl:75
Tpod get_max() const
Definition stats.inl:62
Definition base.py:1
Definition d.py:1
T get_mid(const T &a, const T &b)
Definition misc_language.h:43
static double interpolate(T v, T v0, double i0, T v1, double i1)
Definition stats.inl:25
static double square(double x)
Definition stats.inl:19
@ bit_standard_error
Definition stats.inl:14
@ bit_median
Definition stats.inl:11
@ bit_max
Definition stats.inl:10
@ bit_standard_deviation
Definition stats.inl:13
@ bit_min
Definition stats.inl:9
@ bit_kurtosis
Definition stats.inl:16
@ bit_mean
Definition stats.inl:12
@ bit_variance
Definition stats.inl:15
#define T(x)