General Utility Library for C++17 26.5.0
statistics.h
Go to the documentation of this file.
1
26#ifndef GUL17_STATISTICS_H_
27#define GUL17_STATISTICS_H_
28
29#include <algorithm>
30#include <cmath>
31#include <limits>
32#include <numeric>
33#include <type_traits>
34#include <vector>
35
36#include "gul17/internal.h"
37#include "gul17/traits.h"
38
39namespace gul17 {
40
48
61template <typename ElementT>
63{
64 return [](ElementT const& el) -> ElementT const&
65 { return el; };
66}
67
80struct MinMax {
81 DataT min{ std::numeric_limits<DataT>::max() };
82 DataT max{ std::numeric_limits<DataT>::lowest() };
83};
84
85template <typename DataT>
86struct MinMax<DataT, std::enable_if_t<std::is_floating_point<DataT>::value>> {
87 DataT min{ NAN };
88 DataT max{ NAN };
89};
90
109public:
112
125 operator DataT() const noexcept {
126 return sigma_;
127 }
130 return sigma_;
131 }
134 return mean_;
135 }
136};
137
139
163template <typename ResultT = statistics_result_type,
164 typename ContainerT,
165 typename ElementT = typename ContainerT::value_type,
166 typename Accessor = std::invoke_result_t<decltype(ElementAccessor<ElementT>()), ElementT>(*)(ElementT const&),
167 typename DataT = typename std::decay_t<std::invoke_result_t<Accessor, ElementT>>,
168 typename = std::enable_if_t<IsContainerLike<ContainerT>::value>
169 >
171{
172 auto const sum = std::accumulate(
173 container.cbegin(), container.cend(),
174 ResultT{ },
175 [accessor] (ResultT const& accu, ElementT const& el) {
176 return accu + static_cast<ResultT>(accessor(el)); } );
177 return sum / static_cast<ResultT>(container.size());
178}
179
201template <typename ResultT = statistics_result_type,
202 typename ContainerT,
203 typename ElementT = typename ContainerT::value_type,
204 typename Accessor = std::invoke_result_t<decltype(ElementAccessor<ElementT>()), ElementT>(*)(ElementT const&),
205 typename DataT = typename std::decay_t<std::invoke_result_t<Accessor, ElementT>>,
206 typename = std::enable_if_t<IsContainerLike<ContainerT>::value>
207 >
209{
210 auto const sum = std::accumulate(
211 container.cbegin(), container.cend(),
212 ResultT{ },
213 [accessor] (ResultT const& accu, ElementT const& el) {
214 return accu + std::pow(static_cast<ResultT>(accessor(el)), 2); } );
215 return std::sqrt(sum / static_cast<ResultT>(container.size()));
216}
217
242template <typename ResultT = statistics_result_type,
243 typename ContainerT,
244 typename ElementT = typename ContainerT::value_type,
245 typename Accessor = std::invoke_result_t<decltype(ElementAccessor<ElementT>()), ElementT>(*)(ElementT const&),
246 typename DataT = typename std::decay_t<std::invoke_result_t<Accessor, ElementT>>,
247 typename = std::enable_if_t<IsContainerLike<ContainerT>::value>
248 >
250{
251 auto const len = container.size();
252 if (len == 0)
253 return std::numeric_limits<ResultT>::quiet_NaN();
254
255 // work with a copy of the data
256 // because nth_element() partially sorts the input data
257 auto data_copy = std::vector<DataT>{ };
258 data_copy.resize(len);
259 auto data_copy_it = data_copy.begin();
260 auto data_it = container.cbegin();
261 auto const data_end = container.cend();
262 for (; data_it != data_end; ++data_it) {
264 }
265
266 // What is the middle element?
267 auto middle = data_copy.begin() + (len / 2);
268 std::nth_element(data_copy.begin(), middle, data_copy.end());
269 auto median = static_cast<ResultT>(*middle);
270
271 // If we have an even number of elements we need to do more:
272 // We calculate the mean value of the two 'middle' elements
273 if (0 == len % 2) {
274 std::nth_element(data_copy.begin(), middle - 1, data_copy.end());
275 median = (median / 2) + (static_cast<ResultT>(*(middle - 1)) / 2);
276 }
277
278 return median;
279}
280
311template <typename ContainerT,
312 typename ElementT = typename ContainerT::value_type,
313 typename Accessor = std::invoke_result_t<decltype(ElementAccessor<ElementT>()), ElementT>(*)(ElementT const&),
314 typename DataT = typename std::decay_t<std::invoke_result_t<Accessor, ElementT>>,
315 typename = std::enable_if_t<IsContainerLike<ContainerT>::value>
316>
318{
319 constexpr auto initial_value = std::numeric_limits<DataT>::has_quiet_NaN ?
320 std::numeric_limits<DataT>::quiet_NaN() : std::numeric_limits<DataT>::lowest();
321
322 return std::accumulate(
323 container.cbegin(), container.cend(), initial_value,
324 [&accessor](DataT const& accu, ElementT const& el) -> DataT {
325 auto const val = accessor(el);
326 // Test portably for not-NAN (some compilers do not have std::isnan() for
327 // integral types)
328 if (val == val) {
329 if (not (val <= accu)) // inverted logic to handle NAN correctly
330 return val;
331 }
332 return accu; });
333}
334
365template <typename ContainerT,
366 typename ElementT = typename ContainerT::value_type,
367 typename Accessor = std::invoke_result_t<decltype(ElementAccessor<ElementT>()), ElementT>(*)(ElementT const&),
368 typename DataT = typename std::decay_t<std::invoke_result_t<Accessor, ElementT>>,
369 typename = std::enable_if_t<IsContainerLike<ContainerT>::value>
370>
371auto minimum(ContainerT const& container, Accessor accessor = ElementAccessor<ElementT>()) -> DataT
372{
373 constexpr auto initial_value = std::numeric_limits<DataT>::has_quiet_NaN ?
374 std::numeric_limits<DataT>::quiet_NaN() : std::numeric_limits<DataT>::max();
375
376 return std::accumulate(
377 container.cbegin(), container.cend(), initial_value,
378 [&accessor](DataT const& accu, ElementT const& el) -> DataT {
379 auto const val = accessor(el);
380 // Test portably for not-NAN (some compilers do not have std::isnan() for
381 // integral types)
382 if (val == val) {
383 if (not (val >= accu)) // inverted logic to handle NAN correctly
384 return val;
385 }
386 return accu; });
387}
388
424template <typename ContainerT,
425 typename ElementT = typename ContainerT::value_type,
426 typename Accessor = std::invoke_result_t<decltype(ElementAccessor<ElementT>()), ElementT>(*)(ElementT const&),
427 typename DataT = typename std::decay_t<std::invoke_result_t<Accessor, ElementT>>,
428 typename = std::enable_if_t<IsContainerLike<ContainerT>::value>
429 >
430auto min_max(ContainerT const& container, Accessor accessor = ElementAccessor<ElementT>()) -> MinMax<DataT>
431{
432 using MinMaxT = MinMax<DataT>;
433 auto const sum = std::accumulate(
434 container.cbegin(), container.cend(),
435 MinMaxT{ },
436 [accessor] (MinMaxT const& accu, ElementT const& el) -> MinMaxT {
437 auto out{ accu };
438 auto const val = accessor(el);
439 // Test portably for not-NAN (some compilers do not have std::isnan() for
440 // integral types)
441 if (val == val) {
442 // (a >= NAN) and (a <= NAN) always false for all a
443 if (not (val >= out.min))
444 out.min = val;
445 if (not (val <= out.max))
446 out.max = val;
447 }
448 return out; });
449 return sum;
450}
451
472template <typename ContainerT,
473 typename ElementT = typename ContainerT::value_type,
474 typename Accessor = std::invoke_result_t<decltype(ElementAccessor<ElementT>()), ElementT>(*)(ElementT const&),
475 typename DataT = typename std::decay_t<std::invoke_result_t<Accessor, ElementT>>,
476 typename = std::enable_if_t<IsContainerLike<ContainerT>::value>
477 >
478auto remove_outliers(ContainerT&& cont, std::size_t outliers,
479 Accessor accessor = ElementAccessor<ElementT>()) -> ContainerT&
480{
481 while (outliers-- > 0 and cont.size() > 0) {
482 auto max_distant = std::max_element(cont.begin(), cont.end(),
483 [mean = mean(cont, accessor), accessor] (ElementT const& a, ElementT const& b)
484 { return std::abs(accessor(a) - mean) < std::abs(accessor(b) - mean); });
485 cont.erase(max_distant);
486 }
487 return cont;
488}
489
496template <typename ContainerT,
497 typename ElementT = typename ContainerT::value_type,
498 typename Accessor = std::invoke_result_t<decltype(ElementAccessor<ElementT>()), ElementT>(*)(ElementT const&),
499 typename DataT = typename std::decay_t<std::invoke_result_t<Accessor, ElementT>>,
500 typename = std::enable_if_t<IsContainerLike<ContainerT>::value>
501 >
502auto remove_outliers(ContainerT const& cont, std::size_t outliers,
503 Accessor accessor = ElementAccessor<ElementT>()) -> std::vector<ElementT>
504{
505 auto c = std::vector<ElementT>(cont.size());
506 std::copy(cont.cbegin(), cont.cend(), c.begin());
507 return remove_outliers(std::move(c), outliers, accessor);
508}
509
548template <typename ResultT = statistics_result_type,
549 typename ContainerT,
550 typename ElementT = typename ContainerT::value_type,
551 typename Accessor = std::invoke_result_t<decltype(ElementAccessor<ElementT>()), ElementT>(*)(ElementT const&),
552 typename DataT = typename std::decay_t<std::invoke_result_t<Accessor, ElementT>>,
553 typename = std::enable_if_t<IsContainerLike<ContainerT>::value>
554 >
555auto standard_deviation(ContainerT const& container, Accessor accessor = ElementAccessor<ElementT>()) -> StandardDeviationMean<ResultT>
556{
557 auto const len = container.size();
558
559 if (len == 0)
560 return { };
561
562 auto mean_val = mean<ResultT>(container, accessor);
563
564 if (len == 1)
565 return { std::numeric_limits<ResultT>::quiet_NaN(), mean_val };
566
567 auto sum = std::accumulate(container.cbegin(), container.cend(),
568 ResultT{ },
569 [mean_val, accessor] (ResultT const& accu, ElementT const& el)
570 { return accu + std::pow(static_cast<ResultT>(accessor(el)) - mean_val, 2); });
571
572 sum /= static_cast<ResultT>(container.size() - 1);
573
574 return { std::sqrt(sum), mean_val };
575}
576
601template <typename ResultT = statistics_result_type,
602 typename ContainerT,
603 typename ElementT = typename ContainerT::value_type,
604 typename Accessor = std::invoke_result_t<decltype(ElementAccessor<ElementT>()), ElementT>(*)(ElementT const&),
605 typename DataT = typename std::decay_t<std::invoke_result_t<Accessor, ElementT>>,
606 typename OpClosure,
607 typename = std::enable_if_t<IsContainerLike<ContainerT>::value>
608 >
609auto accumulate(ContainerT const& container, OpClosure op, Accessor accessor = ElementAccessor<ElementT>()) -> ResultT
610{
611 auto const sum = std::accumulate(
612 container.cbegin(), container.cend(),
613 ResultT{ },
614 [accessor, op] (ResultT const& accu, ElementT const& el) {
615 return op(accu, accessor(el)); } );
616 return sum;
617}
618
619namespace {
620
621// The following stuff is only there to have a two iterator interface
622
623 template <typename IteratorT>
624 struct ContainerView {
625 IteratorT const& begin_;
626 IteratorT const& end_;
627 using value_type = std::decay_t<decltype(*begin_)>;
628
629 ContainerView(IteratorT const& i1, IteratorT const& i2)
630 : begin_{ i1 }
631 , end_{ i2 }
632 {
633 }
634
635 // Just implement the member functions that we use here
636
637 auto cbegin() const noexcept -> IteratorT const&
638 {
639 return begin_;
640 }
641 auto cend() const noexcept -> IteratorT const&
642 {
643 return end_;
644 }
645
646 auto size() const noexcept -> std::size_t
647 {
648 return static_cast<std::size_t>(std::distance(begin_, end_));
649 }
650 };
651
652 template<typename IteratorT>
653 auto make_view(IteratorT const& cbegin, IteratorT const& cend) -> ContainerView<IteratorT> const
654 {
655 return ContainerView<IteratorT>{ cbegin, cend };
656 }
657
658} // namespace anonymous
659
669template <typename ResultT = statistics_result_type,
670 typename IteratorT,
671 typename ElementT = std::decay_t<decltype(*std::declval<IteratorT>())>,
672 typename Accessor = std::invoke_result_t<decltype(ElementAccessor<ElementT>()), ElementT>(*)(ElementT const&),
673 typename DataT = std::decay_t<std::invoke_result_t<Accessor, ElementT>>>
674auto mean(IteratorT const& begin, IteratorT const& end,
675 Accessor accessor = ElementAccessor<ElementT>()) -> ResultT
676{
677 return mean<ResultT>(make_view(begin, end), accessor);
678}
679
689template <typename ResultT = statistics_result_type,
690 typename IteratorT,
691 typename ElementT = std::decay_t<decltype(*std::declval<IteratorT>())>,
692 typename Accessor = std::invoke_result_t<decltype(ElementAccessor<ElementT>()), ElementT>(*)(ElementT const&),
693 typename DataT = std::decay_t<std::invoke_result_t<Accessor, ElementT>>>
694auto rms(IteratorT const& begin, IteratorT const& end,
695 Accessor accessor = ElementAccessor<ElementT>()) -> ResultT
696{
697 return rms<ResultT>(make_view(begin, end), accessor);
698}
699
709template <typename ResultT = statistics_result_type,
710 typename IteratorT,
711 typename ElementT = std::decay_t<decltype(*std::declval<IteratorT>())>,
712 typename Accessor = std::invoke_result_t<decltype(ElementAccessor<ElementT>()), ElementT>(*)(ElementT const&),
713 typename DataT = std::decay_t<std::invoke_result_t<Accessor, ElementT>>>
714auto median(IteratorT const& begin, IteratorT const& end,
715 Accessor accessor = ElementAccessor<ElementT>()) -> ResultT
716{
717 return median<ResultT>(make_view(begin, end), accessor);
718}
719
729template <typename IteratorT,
730 typename ElementT = std::decay_t<decltype(*std::declval<IteratorT>())>,
731 typename Accessor = std::invoke_result_t<decltype(ElementAccessor<ElementT>()), ElementT>(*)(ElementT const&),
732 typename DataT = std::decay_t<std::invoke_result_t<Accessor, ElementT>>>
733 auto maximum(IteratorT const& begin, IteratorT const& end,
734 Accessor accessor = ElementAccessor<ElementT>()) -> DataT
735{
736 return maximum(make_view(begin, end), accessor);
737}
738
748template <typename IteratorT,
749 typename ElementT = std::decay_t<decltype(*std::declval<IteratorT>())>,
750 typename Accessor = std::invoke_result_t<decltype(ElementAccessor<ElementT>()), ElementT>(*)(ElementT const&),
751 typename DataT = std::decay_t<std::invoke_result_t<Accessor, ElementT>>>
752 auto minimum(IteratorT const& begin, IteratorT const& end,
753 Accessor accessor = ElementAccessor<ElementT>()) -> DataT
754{
755 return minimum(make_view(begin, end), accessor);
756}
757
767template <typename IteratorT,
768 typename ElementT = std::decay_t<decltype(*std::declval<IteratorT>())>,
769 typename Accessor = std::invoke_result_t<decltype(ElementAccessor<ElementT>()), ElementT>(*)(ElementT const&),
770 typename DataT = std::decay_t<std::invoke_result_t<Accessor, ElementT>>>
771auto min_max(IteratorT const& begin, IteratorT const& end,
772 Accessor accessor = ElementAccessor<ElementT>()) -> MinMax<DataT>
773{
774 return min_max(make_view(begin, end), accessor);
775}
776
788template <typename IteratorT,
789 typename ElementT = std::decay_t<decltype(*std::declval<IteratorT>())>,
790 typename Accessor = std::invoke_result_t<decltype(ElementAccessor<ElementT>()), ElementT>(*)(ElementT const&),
791 typename DataT = std::decay_t<std::invoke_result_t<Accessor, ElementT>>>
792auto remove_outliers(IteratorT const& begin, IteratorT const& end,
793 std::size_t outliers, Accessor accessor = ElementAccessor<ElementT>()) -> std::vector<ElementT>
794{
795 return remove_outliers(make_view(begin, end), outliers, accessor);
796}
797
808template <typename ResultT = statistics_result_type,
809 typename IteratorT,
810 typename ElementT = std::decay_t<decltype(*std::declval<IteratorT>())>,
811 typename Accessor = std::invoke_result_t<decltype(ElementAccessor<ElementT>()), ElementT>(*)(ElementT const&),
812 typename DataT = std::decay_t<std::invoke_result_t<Accessor, ElementT>>>
813auto standard_deviation(IteratorT const& begin, IteratorT const& end,
814 Accessor accessor = ElementAccessor<ElementT>()) -> StandardDeviationMean<ResultT>
815{
816 return standard_deviation<ResultT>(make_view(begin, end), accessor);
817}
818
830template <typename ResultT = statistics_result_type,
831 typename IteratorT,
832 typename ElementT = std::decay_t<decltype(*std::declval<IteratorT>())>,
833 typename Accessor = std::invoke_result_t<decltype(ElementAccessor<ElementT>()), ElementT>(*)(ElementT const&),
834 typename DataT = std::decay_t<std::invoke_result_t<Accessor, ElementT>>,
835 typename OpClosure>
836auto accumulate(IteratorT const& begin, IteratorT const& end, OpClosure op,
837 Accessor accessor = ElementAccessor<ElementT>()) -> ResultT
838{
839 return accumulate<ResultT>(make_view(begin, end), op, accessor);
840}
841
842} // namespace gul17
843
844#endif
845
846// vi:ts=4:sw=4:sts=4:et
A struct holding a standard deviation and a mean value.
Definition statistics.h:108
auto constexpr bit_set(unsigned bit) noexcept -> ReturnT
Set a bit in an integral type.
Definition bit_manip.h:124
auto min_max(ContainerT const &container, Accessor accessor=ElementAccessor< ElementT >()) -> MinMax< DataT >
Find the minimum and maximum element values in a container.
Definition statistics.h:430
auto minimum(ContainerT const &container, Accessor accessor=ElementAccessor< ElementT >()) -> DataT
Return the minimum element value in a container.
Definition statistics.h:371
DataT sigma_
The standard deviation (sigma) value.
Definition statistics.h:110
auto rms(ContainerT const &container, Accessor accessor=ElementAccessor< ElementT >()) -> ResultT
Calculate the root mean square of all elements in a container.
Definition statistics.h:208
double statistics_result_type
Type used to return statistic properties.
Definition statistics.h:47
DataT max
Maximum value.
Definition statistics.h:82
auto median(ContainerT const &container, Accessor accessor=ElementAccessor< ElementT >()) -> ResultT
Find the median of all elements in a container.
Definition statistics.h:249
auto remove_outliers(ContainerT &&cont, std::size_t outliers, Accessor accessor=ElementAccessor< ElementT >()) -> ContainerT &
Remove elements that are far away from other elements.
Definition statistics.h:478
auto accumulate(ContainerT const &container, OpClosure op, Accessor accessor=ElementAccessor< ElementT >()) -> ResultT
Calculate some aggregate value from all elements of a container.
Definition statistics.h:609
auto standard_deviation(ContainerT const &container, Accessor accessor=ElementAccessor< ElementT >()) -> StandardDeviationMean< ResultT >
Calculate the standard deviation of all elements in a container.
Definition statistics.h:555
auto sigma() const noexcept -> DataT
Get the standard deviation value.
Definition statistics.h:129
DataT mean_
The mean value.
Definition statistics.h:111
auto mean(ContainerT const &container, Accessor accessor=ElementAccessor< ElementT >()) -> ResultT
Calculate the arithmetic mean value of all elements in a container.
Definition statistics.h:170
auto ElementAccessor()
Return a mock element accessor for containers.
Definition statistics.h:62
auto mean() const noexcept -> DataT
Get the arithmetic mean value.
Definition statistics.h:133
auto maximum(ContainerT const &container, Accessor accessor=ElementAccessor< ElementT >()) -> DataT
Return the maximum element value in a container.
Definition statistics.h:317
DataT min
Minimum value.
Definition statistics.h:81
Definition of macros used internally by GUL.
Namespace gul17 contains all functions and classes of the General Utility Library.
Definition doxygen.h:29
Object that is designed to holds two values: minimum and maximum of something.
Definition statistics.h:80
Some metaprogramming traits for the General Utility Library.