General Utility Library for C++17 25.4.1
statistics.h
Go to the documentation of this file.
1
23#ifndef GUL17_STATISTICS_H_
24#define GUL17_STATISTICS_H_
25
26#include <algorithm>
27#include <cmath>
28#include <limits>
29#include <numeric>
30#include <type_traits>
31#include <vector>
32
33#include "gul17/internal.h"
34#include "gul17/traits.h"
35
36namespace gul17 {
37
45
58template <typename ElementT>
60{
61 return [](ElementT const& el) -> ElementT const&
62 { return el; };
63}
64
77struct MinMax {
78 DataT min{ std::numeric_limits<DataT>::max() };
79 DataT max{ std::numeric_limits<DataT>::lowest() };
80};
81
82template <typename DataT>
83struct MinMax<DataT, std::enable_if_t<std::is_floating_point<DataT>::value>> {
84 DataT min{ NAN };
85 DataT max{ NAN };
86};
87
106public:
109
122 operator DataT() const noexcept {
123 return sigma_;
124 }
127 return sigma_;
128 }
131 return mean_;
132 }
133};
134
136
160template <typename ResultT = statistics_result_type,
161 typename ContainerT,
162 typename ElementT = typename ContainerT::value_type,
163 typename Accessor = std::result_of_t<decltype(ElementAccessor<ElementT>())(ElementT)>(*)(ElementT const&),
164 typename DataT = typename std::decay_t<std::result_of_t<Accessor(ElementT)>>,
165 typename = std::enable_if_t<IsContainerLike<ContainerT>::value>
166 >
168{
169 auto const sum = std::accumulate(
170 container.cbegin(), container.cend(),
171 ResultT{ },
172 [accessor] (ResultT const& accu, ElementT const& el) {
173 return accu + static_cast<ResultT>(accessor(el)); } );
174 return sum / static_cast<ResultT>(container.size());
175}
176
198template <typename ResultT = statistics_result_type,
199 typename ContainerT,
200 typename ElementT = typename ContainerT::value_type,
201 typename Accessor = std::result_of_t<decltype(ElementAccessor<ElementT>())(ElementT)>(*)(ElementT const&),
202 typename DataT = typename std::decay_t<std::result_of_t<Accessor(ElementT)>>,
203 typename = std::enable_if_t<IsContainerLike<ContainerT>::value>
204 >
206{
207 auto const sum = std::accumulate(
208 container.cbegin(), container.cend(),
209 ResultT{ },
210 [accessor] (ResultT const& accu, ElementT const& el) {
211 return accu + std::pow(static_cast<ResultT>(accessor(el)), 2); } );
212 return std::sqrt(sum / static_cast<ResultT>(container.size()));
213}
214
239template <typename ResultT = statistics_result_type,
240 typename ContainerT,
241 typename ElementT = typename ContainerT::value_type,
242 typename Accessor = std::result_of_t<decltype(ElementAccessor<ElementT>())(ElementT)>(*)(ElementT const&),
243 typename DataT = typename std::decay_t<std::result_of_t<Accessor(ElementT)>>,
244 typename = std::enable_if_t<IsContainerLike<ContainerT>::value>
245 >
247{
248 auto const len = container.size();
249 if (len == 0)
250 return std::numeric_limits<ResultT>::quiet_NaN();
251
252 // work with a copy of the data
253 // because nth_element() partially sorts the input data
254 auto data_copy = std::vector<DataT>{ };
255 data_copy.resize(len);
256 auto data_copy_it = data_copy.begin();
257 auto data_it = container.cbegin();
258 auto const data_end = container.cend();
259 for (; data_it != data_end; ++data_it) {
261 }
262
263 // What is the middle element?
264 auto middle = data_copy.begin() + (len / 2);
265 std::nth_element(data_copy.begin(), middle, data_copy.end());
266 auto median = static_cast<ResultT>(*middle);
267
268 // If we have an even number of elements we need to do more:
269 // We calculate the mean value of the two 'middle' elements
270 if (0 == len % 2) {
271 std::nth_element(data_copy.begin(), middle - 1, data_copy.end());
272 median = (median / 2) + (static_cast<ResultT>(*(middle - 1)) / 2);
273 }
274
275 return median;
276}
277
308template <typename ContainerT,
309 typename ElementT = typename ContainerT::value_type,
310 typename Accessor = std::result_of_t<decltype(ElementAccessor<ElementT>())(ElementT)>(*)(ElementT const&),
311 typename DataT = typename std::decay_t<std::result_of_t<Accessor(ElementT)>>,
312 typename = std::enable_if_t<IsContainerLike<ContainerT>::value>
313>
315{
316 constexpr auto initial_value = std::numeric_limits<DataT>::has_quiet_NaN ?
317 std::numeric_limits<DataT>::quiet_NaN() : std::numeric_limits<DataT>::lowest();
318
319 return std::accumulate(
320 container.cbegin(), container.cend(), initial_value,
321 [&accessor](DataT const& accu, ElementT const& el) -> DataT {
322 auto const val = accessor(el);
323 // Test portably for not-NAN (some compilers do not have std::isnan() for
324 // integral types)
325 if (val == val) {
326 if (not (val <= accu)) // inverted logic to handle NAN correctly
327 return val;
328 }
329 return accu; });
330}
331
362template <typename ContainerT,
363 typename ElementT = typename ContainerT::value_type,
364 typename Accessor = std::result_of_t<decltype(ElementAccessor<ElementT>())(ElementT)>(*)(ElementT const&),
365 typename DataT = typename std::decay_t<std::result_of_t<Accessor(ElementT)>>,
366 typename = std::enable_if_t<IsContainerLike<ContainerT>::value>
367>
368auto minimum(ContainerT const& container, Accessor accessor = ElementAccessor<ElementT>()) -> DataT
369{
370 constexpr auto initial_value = std::numeric_limits<DataT>::has_quiet_NaN ?
371 std::numeric_limits<DataT>::quiet_NaN() : std::numeric_limits<DataT>::max();
372
373 return std::accumulate(
374 container.cbegin(), container.cend(), initial_value,
375 [&accessor](DataT const& accu, ElementT const& el) -> DataT {
376 auto const val = accessor(el);
377 // Test portably for not-NAN (some compilers do not have std::isnan() for
378 // integral types)
379 if (val == val) {
380 if (not (val >= accu)) // inverted logic to handle NAN correctly
381 return val;
382 }
383 return accu; });
384}
385
421template <typename ContainerT,
422 typename ElementT = typename ContainerT::value_type,
423 typename Accessor = std::result_of_t<decltype(ElementAccessor<ElementT>())(ElementT)>(*)(ElementT const&),
424 typename DataT = typename std::decay_t<std::result_of_t<Accessor(ElementT)>>,
425 typename = std::enable_if_t<IsContainerLike<ContainerT>::value>
426 >
427auto min_max(ContainerT const& container, Accessor accessor = ElementAccessor<ElementT>()) -> MinMax<DataT>
428{
429 using MinMaxT = MinMax<DataT>;
430 auto const sum = std::accumulate(
431 container.cbegin(), container.cend(),
432 MinMaxT{ },
433 [accessor] (MinMaxT const& accu, ElementT const& el) -> MinMaxT {
434 auto out{ accu };
435 auto const val = accessor(el);
436 // Test portably for not-NAN (some compilers do not have std::isnan() for
437 // integral types)
438 if (val == val) {
439 // (a >= NAN) and (a <= NAN) always false for all a
440 if (not (val >= out.min))
441 out.min = val;
442 if (not (val <= out.max))
443 out.max = val;
444 }
445 return out; });
446 return sum;
447}
448
469template <typename ContainerT,
470 typename ElementT = typename ContainerT::value_type,
471 typename Accessor = std::result_of_t<decltype(ElementAccessor<ElementT>())(ElementT)>(*)(ElementT const&),
472 typename DataT = typename std::decay_t<std::result_of_t<Accessor(ElementT)>>,
473 typename = std::enable_if_t<IsContainerLike<ContainerT>::value>
474 >
475auto remove_outliers(ContainerT&& cont, std::size_t outliers,
476 Accessor accessor = ElementAccessor<ElementT>()) -> ContainerT&
477{
478 while (outliers-- > 0 and cont.size() > 0) {
479 auto max_distant = std::max_element(cont.begin(), cont.end(),
480 [mean = mean(cont, accessor), accessor] (ElementT const& a, ElementT const& b)
481 { return std::abs(accessor(a) - mean) < std::abs(accessor(b) - mean); });
482 cont.erase(max_distant);
483 }
484 return cont;
485}
486
493template <typename ContainerT,
494 typename ElementT = typename ContainerT::value_type,
495 typename Accessor = std::result_of_t<decltype(ElementAccessor<ElementT>())(ElementT)>(*)(ElementT const&),
496 typename DataT = typename std::decay_t<std::result_of_t<Accessor(ElementT)>>,
497 typename = std::enable_if_t<IsContainerLike<ContainerT>::value>
498 >
499auto remove_outliers(ContainerT const& cont, std::size_t outliers,
500 Accessor accessor = ElementAccessor<ElementT>()) -> std::vector<ElementT>
501{
502 auto c = std::vector<ElementT>(cont.size());
503 std::copy(cont.cbegin(), cont.cend(), c.begin());
504 return remove_outliers(std::move(c), outliers, accessor);
505}
506
545template <typename ResultT = statistics_result_type,
546 typename ContainerT,
547 typename ElementT = typename ContainerT::value_type,
548 typename Accessor = std::result_of_t<decltype(ElementAccessor<ElementT>())(ElementT)>(*)(ElementT const&),
549 typename DataT = typename std::decay_t<std::result_of_t<Accessor(ElementT)>>,
550 typename = std::enable_if_t<IsContainerLike<ContainerT>::value>
551 >
552auto standard_deviation(ContainerT const& container, Accessor accessor = ElementAccessor<ElementT>()) -> StandardDeviationMean<ResultT>
553{
554 auto const len = container.size();
555
556 if (len == 0)
557 return { };
558
559 auto mean_val = mean<ResultT>(container, accessor);
560
561 if (len == 1)
562 return { std::numeric_limits<ResultT>::quiet_NaN(), mean_val };
563
564 auto sum = std::accumulate(container.cbegin(), container.cend(),
565 ResultT{ },
566 [mean_val, accessor] (ResultT const& accu, ElementT const& el)
567 { return accu + std::pow(static_cast<ResultT>(accessor(el)) - mean_val, 2); });
568
569 sum /= static_cast<ResultT>(container.size() - 1);
570
571 return { std::sqrt(sum), mean_val };
572}
573
598template <typename ResultT = statistics_result_type,
599 typename ContainerT,
600 typename ElementT = typename ContainerT::value_type,
601 typename Accessor = std::result_of_t<decltype(ElementAccessor<ElementT>())(ElementT)>(*)(ElementT const&),
602 typename DataT = typename std::decay_t<std::result_of_t<Accessor(ElementT)>>,
603 typename OpClosure,
604 typename = std::enable_if_t<IsContainerLike<ContainerT>::value>
605 >
606auto accumulate(ContainerT const& container, OpClosure op, Accessor accessor = ElementAccessor<ElementT>()) -> ResultT
607{
608 auto const sum = std::accumulate(
609 container.cbegin(), container.cend(),
610 ResultT{ },
611 [accessor, op] (ResultT const& accu, ElementT const& el) {
612 return op(accu, accessor(el)); } );
613 return sum;
614}
615
616namespace {
617
618// The following stuff is only there to have a two iterator interface
619
620 template <typename IteratorT>
621 struct ContainerView {
622 IteratorT const& begin_;
623 IteratorT const& end_;
624 using value_type = std::decay_t<decltype(*begin_)>;
625
626 ContainerView(IteratorT const& i1, IteratorT const& i2)
627 : begin_{ i1 }
628 , end_{ i2 }
629 {
630 }
631
632 // Just implement the member functions that we use here
633
634 auto cbegin() const noexcept -> IteratorT const&
635 {
636 return begin_;
637 }
638 auto cend() const noexcept -> IteratorT const&
639 {
640 return end_;
641 }
642
643 auto size() const noexcept -> std::size_t
644 {
645 return std::distance(begin_, end_);
646 }
647 };
648
649 template<typename IteratorT>
650 auto make_view(IteratorT const& cbegin, IteratorT const& cend) -> ContainerView<IteratorT> const
651 {
652 return ContainerView<IteratorT>{ cbegin, cend };
653 }
654
655} // namespace anonymous
656
666template <typename ResultT = statistics_result_type,
667 typename IteratorT,
668 typename ElementT = std::decay_t<decltype(*std::declval<IteratorT>())>,
669 typename Accessor = std::result_of_t<decltype(ElementAccessor<ElementT>())(ElementT)>(*)(ElementT const&),
670 typename DataT = std::decay_t<std::result_of_t<Accessor(ElementT)>>>
671auto mean(IteratorT const& begin, IteratorT const& end,
672 Accessor accessor = ElementAccessor<ElementT>()) -> ResultT
673{
674 return mean<ResultT>(make_view(begin, end), accessor);
675}
676
686template <typename ResultT = statistics_result_type,
687 typename IteratorT,
688 typename ElementT = std::decay_t<decltype(*std::declval<IteratorT>())>,
689 typename Accessor = std::result_of_t<decltype(ElementAccessor<ElementT>())(ElementT)>(*)(ElementT const&),
690 typename DataT = std::decay_t<std::result_of_t<Accessor(ElementT)>>>
691auto rms(IteratorT const& begin, IteratorT const& end,
692 Accessor accessor = ElementAccessor<ElementT>()) -> ResultT
693{
694 return rms<ResultT>(make_view(begin, end), accessor);
695}
696
706template <typename ResultT = statistics_result_type,
707 typename IteratorT,
708 typename ElementT = std::decay_t<decltype(*std::declval<IteratorT>())>,
709 typename Accessor = std::result_of_t<decltype(ElementAccessor<ElementT>())(ElementT)>(*)(ElementT const&),
710 typename DataT = std::decay_t<std::result_of_t<Accessor(ElementT)>>>
711auto median(IteratorT const& begin, IteratorT const& end,
712 Accessor accessor = ElementAccessor<ElementT>()) -> ResultT
713{
714 return median<ResultT>(make_view(begin, end), accessor);
715}
716
726template <typename IteratorT,
727 typename ElementT = std::decay_t<decltype(*std::declval<IteratorT>())>,
728 typename Accessor = std::result_of_t<decltype(ElementAccessor<ElementT>())(ElementT)>(*)(ElementT const&),
729 typename DataT = std::decay_t<std::result_of_t<Accessor(ElementT)>>>
730 auto maximum(IteratorT const& begin, IteratorT const& end,
731 Accessor accessor = ElementAccessor<ElementT>()) -> DataT
732{
733 return maximum(make_view(begin, end), accessor);
734}
735
745template <typename IteratorT,
746 typename ElementT = std::decay_t<decltype(*std::declval<IteratorT>())>,
747 typename Accessor = std::result_of_t<decltype(ElementAccessor<ElementT>())(ElementT)>(*)(ElementT const&),
748 typename DataT = std::decay_t<std::result_of_t<Accessor(ElementT)>>>
749 auto minimum(IteratorT const& begin, IteratorT const& end,
750 Accessor accessor = ElementAccessor<ElementT>()) -> DataT
751{
752 return minimum(make_view(begin, end), accessor);
753}
754
764template <typename IteratorT,
765 typename ElementT = std::decay_t<decltype(*std::declval<IteratorT>())>,
766 typename Accessor = std::result_of_t<decltype(ElementAccessor<ElementT>())(ElementT)>(*)(ElementT const&),
767 typename DataT = std::decay_t<std::result_of_t<Accessor(ElementT)>>>
768auto min_max(IteratorT const& begin, IteratorT const& end,
769 Accessor accessor = ElementAccessor<ElementT>()) -> MinMax<DataT>
770{
771 return min_max(make_view(begin, end), accessor);
772}
773
785template <typename IteratorT,
786 typename ElementT = std::decay_t<decltype(*std::declval<IteratorT>())>,
787 typename Accessor = std::result_of_t<decltype(ElementAccessor<ElementT>())(ElementT)>(*)(ElementT const&),
788 typename DataT = std::decay_t<std::result_of_t<Accessor(ElementT)>>>
789auto remove_outliers(IteratorT const& begin, IteratorT const& end,
790 std::size_t outliers, Accessor accessor = ElementAccessor<ElementT>()) -> std::vector<ElementT>
791{
792 return remove_outliers(make_view(begin, end), outliers, accessor);
793}
794
805template <typename ResultT = statistics_result_type,
806 typename IteratorT,
807 typename ElementT = std::decay_t<decltype(*std::declval<IteratorT>())>,
808 typename Accessor = std::result_of_t<decltype(ElementAccessor<ElementT>())(ElementT)>(*)(ElementT const&),
809 typename DataT = std::decay_t<std::result_of_t<Accessor(ElementT)>>>
810auto standard_deviation(IteratorT const& begin, IteratorT const& end,
811 Accessor accessor = ElementAccessor<ElementT>()) -> StandardDeviationMean<ResultT>
812{
813 return standard_deviation<ResultT>(make_view(begin, end), accessor);
814}
815
827template <typename ResultT = statistics_result_type,
828 typename IteratorT,
829 typename ElementT = std::decay_t<decltype(*std::declval<IteratorT>())>,
830 typename Accessor = std::result_of_t<decltype(ElementAccessor<ElementT>())(ElementT)>(*)(ElementT const&),
831 typename DataT = std::decay_t<std::result_of_t<Accessor(ElementT)>>,
832 typename OpClosure>
833auto accumulate(IteratorT const& begin, IteratorT const& end, OpClosure op,
834 Accessor accessor = ElementAccessor<ElementT>()) -> ResultT
835{
836 return accumulate<ResultT>(make_view(begin, end), op, accessor);
837}
838
839} // namespace gul17
840
841#endif
842
843// vi:ts=4:sw=4:sts=4:et
A struct holding a standard deviation and a mean value.
Definition statistics.h:105
auto constexpr bit_set(unsigned bit) noexcept -> ReturnT
Set a bit in an integral type.
Definition bit_manip.h:121
auto min_max(ContainerT const &container, Accessor accessor=ElementAccessor< ElementT >()) -> MinMax< DataT >
Find the minimum and maximum element values in a container.
Definition statistics.h:427
auto minimum(ContainerT const &container, Accessor accessor=ElementAccessor< ElementT >()) -> DataT
Return the minimum element value in a container.
Definition statistics.h:368
DataT sigma_
The standard deviation (sigma) value.
Definition statistics.h:107
auto rms(ContainerT const &container, Accessor accessor=ElementAccessor< ElementT >()) -> ResultT
Calculate the root mean square of all elements in a container.
Definition statistics.h:205
double statistics_result_type
Type used to return statistic properties.
Definition statistics.h:44
DataT max
Maximum value.
Definition statistics.h:79
auto median(ContainerT const &container, Accessor accessor=ElementAccessor< ElementT >()) -> ResultT
Find the median of all elements in a container.
Definition statistics.h:246
auto remove_outliers(ContainerT &&cont, std::size_t outliers, Accessor accessor=ElementAccessor< ElementT >()) -> ContainerT &
Remove elements that are far away from other elements.
Definition statistics.h:475
auto accumulate(ContainerT const &container, OpClosure op, Accessor accessor=ElementAccessor< ElementT >()) -> ResultT
Calculate some aggregate value from all elements of a container.
Definition statistics.h:606
auto standard_deviation(ContainerT const &container, Accessor accessor=ElementAccessor< ElementT >()) -> StandardDeviationMean< ResultT >
Calculate the standard deviation of all elements in a container.
Definition statistics.h:552
auto sigma() const noexcept -> DataT
Get the standard deviation value.
Definition statistics.h:126
DataT mean_
The mean value.
Definition statistics.h:108
auto mean(ContainerT const &container, Accessor accessor=ElementAccessor< ElementT >()) -> ResultT
Calculate the arithmetic mean value of all elements in a container.
Definition statistics.h:167
auto ElementAccessor()
Return a mock element accessor for containers.
Definition statistics.h:59
auto mean() const noexcept -> DataT
Get the arithmetic mean value.
Definition statistics.h:130
auto maximum(ContainerT const &container, Accessor accessor=ElementAccessor< ElementT >()) -> DataT
Return the maximum element value in a container.
Definition statistics.h:314
DataT min
Minimum value.
Definition statistics.h:78
Definition of macros used internally by GUL.
Namespace gul17 contains all functions and classes of the General Utility Library.
Definition doxygen.h:26
Object that is designed to holds two values: minimum and maximum of something.
Definition statistics.h:77
Some metaprogramming traits for the General Utility Library.