Serenity Operating System
1/*
2 * Copyright (c) 2021, Tobias Christiansen <tobyase@serenityos.org>
3 *
4 * SPDX-License-Identifier: BSD-2-Clause
5 */
6
7#pragma once
8
9#include <AK/Concepts.h>
10#include <AK/Math.h>
11#include <AK/QuickSelect.h>
12#include <AK/QuickSort.h>
13#include <AK/Vector.h>
14
15namespace AK {
16
17static constexpr int ODD_NAIVE_MEDIAN_CUTOFF = 200;
18static constexpr int EVEN_NAIVE_MEDIAN_CUTOFF = 350;
19
20template<Arithmetic T = float>
21class Statistics {
22public:
23 Statistics() = default;
24 ~Statistics() = default;
25
26 void add(T const& value)
27 {
28 // FIXME: Check for an overflow
29 m_sum += value;
30 m_values.append(value);
31 }
32
33 T const sum() const { return m_sum; }
34
35 // FIXME: Unclear Wording, average can mean a lot of different things
36 // Median, Arithmetic Mean (which this is), Geometric Mean, Harmonic Mean etc
37 float average() const
38 {
39 // Let's assume the average of an empty dataset is 0
40 if (size() == 0)
41 return 0;
42
43 // TODO: sum might overflow so maybe do multiple partial sums and intermediate divisions here
44 return (float)sum() / size();
45 }
46
47 T const min() const
48 {
49 // Lets Rather fail than read over the end of a collection
50 VERIFY(size() != 0);
51
52 T minimum = m_values[0];
53 for (T number : values()) {
54 if (number < minimum) {
55 minimum = number;
56 }
57 }
58 return minimum;
59 }
60
61 T const max() const
62 {
63 // Lets Rather fail than read over the end of a collection
64 VERIFY(size() != 0);
65
66 T maximum = m_values[0];
67 for (T number : values()) {
68 if (number > maximum) {
69 maximum = number;
70 }
71 }
72 return maximum;
73 }
74
75 T const median()
76 {
77 // Let's assume the Median of an empty dataset is 0
78 if (size() == 0)
79 return 0;
80
81 // If the number of values is even, the median is the arithmetic mean of the two middle values
82 if (size() <= EVEN_NAIVE_MEDIAN_CUTOFF && size() % 2 == 0) {
83 quick_sort(m_values);
84 return (m_values.at(size() / 2) + m_values.at(size() / 2 - 1)) / 2;
85 } else if (size() <= ODD_NAIVE_MEDIAN_CUTOFF && size() % 2 == 1) {
86 quick_sort(m_values);
87 return m_values.at(m_values.size() / 2);
88 } else if (size() % 2 == 0) {
89 auto index = size() / 2;
90 auto median1 = m_values.at(AK::quickselect_inplace(m_values, index));
91 auto median2 = m_values.at(AK::quickselect_inplace(m_values, index - 1));
92 return (median1 + median2) / 2;
93 }
94 return m_values.at(AK::quickselect_inplace(m_values, size() / 2));
95 }
96
97 float standard_deviation() const { return sqrt(variance()); }
98 float variance() const
99 {
100 float summation = 0;
101 float avg = average();
102 for (T number : values()) {
103 float difference = (float)number - avg;
104 summation += (difference * difference);
105 }
106 summation = summation / size();
107 return summation;
108 }
109
110 Vector<T> const& values() const { return m_values; }
111 size_t size() const { return m_values.size(); }
112
113private:
114 Vector<T> m_values;
115 T m_sum {};
116};
117
118}