Medial Code Documentation
Loading...
Searching...
No Matches
stats.h
1
4#ifndef XGBOOST_COMMON_STATS_H_
5#define XGBOOST_COMMON_STATS_H_
6#include <algorithm>
7#include <iterator> // for distance
8#include <limits>
9#include <vector>
10
11#include "algorithm.h" // for StableSort
12#include "common.h" // AssertGPUSupport, OptionalWeights
13#include "optional_weight.h" // OptionalWeights
14#include "transform_iterator.h" // MakeIndexTransformIter
15#include "xgboost/context.h" // Context
16#include "xgboost/linalg.h" // TensorView,VectorView
17#include "xgboost/logging.h" // CHECK_GE
18
19namespace xgboost {
20namespace common {
21
33template <typename Iter>
34float Quantile(Context const* ctx, double alpha, Iter const& begin, Iter const& end) {
35 CHECK(alpha >= 0 && alpha <= 1);
36 auto n = static_cast<double>(std::distance(begin, end));
37 if (n == 0) {
38 return std::numeric_limits<float>::quiet_NaN();
39 }
40
41 std::vector<std::size_t> sorted_idx(n);
42 std::iota(sorted_idx.begin(), sorted_idx.end(), 0);
43 if (omp_in_parallel()) {
44 std::stable_sort(sorted_idx.begin(), sorted_idx.end(),
45 [&](std::size_t l, std::size_t r) { return *(begin + l) < *(begin + r); });
46 } else {
47 StableSort(ctx, sorted_idx.begin(), sorted_idx.end(),
48 [&](std::size_t l, std::size_t r) { return *(begin + l) < *(begin + r); });
49 }
50
51 auto val = [&](size_t i) { return *(begin + sorted_idx[i]); };
52 static_assert(std::is_same<decltype(val(0)), float>::value);
53
54 if (alpha <= (1 / (n + 1))) {
55 return val(0);
56 }
57 if (alpha >= (n / (n + 1))) {
58 return val(sorted_idx.size() - 1);
59 }
60
61 double x = alpha * static_cast<double>((n + 1));
62 double k = std::floor(x) - 1;
63 CHECK_GE(k, 0);
64 double d = (x - 1) - k;
65
66 auto v0 = val(static_cast<size_t>(k));
67 auto v1 = val(static_cast<size_t>(k) + 1);
68 return v0 + d * (v1 - v0);
69}
70
78template <typename Iter, typename WeightIter>
79float WeightedQuantile(Context const* ctx, double alpha, Iter begin, Iter end, WeightIter w_begin) {
80 auto n = static_cast<double>(std::distance(begin, end));
81 if (n == 0) {
82 return std::numeric_limits<float>::quiet_NaN();
83 }
84 std::vector<size_t> sorted_idx(n);
85 std::iota(sorted_idx.begin(), sorted_idx.end(), 0);
86 if (omp_in_parallel()) {
87 std::stable_sort(sorted_idx.begin(), sorted_idx.end(),
88 [&](std::size_t l, std::size_t r) { return *(begin + l) < *(begin + r); });
89 } else {
90 StableSort(ctx, sorted_idx.begin(), sorted_idx.end(),
91 [&](std::size_t l, std::size_t r) { return *(begin + l) < *(begin + r); });
92 }
93
94 auto val = [&](size_t i) { return *(begin + sorted_idx[i]); };
95
96 std::vector<float> weight_cdf(n); // S_n
97 // weighted cdf is sorted during construction
98 weight_cdf[0] = *(w_begin + sorted_idx[0]);
99 for (size_t i = 1; i < n; ++i) {
100 weight_cdf[i] = weight_cdf[i - 1] + w_begin[sorted_idx[i]];
101 }
102 float thresh = weight_cdf.back() * alpha;
103 std::size_t idx =
104 std::lower_bound(weight_cdf.cbegin(), weight_cdf.cend(), thresh) - weight_cdf.cbegin();
105 idx = std::min(idx, static_cast<size_t>(n - 1));
106 return val(idx);
107}
108
109namespace cuda_impl {
110void Median(Context const* ctx, linalg::TensorView<float const, 2> t, OptionalWeights weights,
112
114
115#if !defined(XGBOOST_USE_CUDA)
116inline void Median(Context const*, linalg::TensorView<float const, 2>, OptionalWeights,
118 common::AssertGPUSupport();
119}
120inline void Mean(Context const*, linalg::VectorView<float const>, linalg::VectorView<float>) {
121 common::AssertGPUSupport();
122}
123#endif // !defined(XGBOOST_USE_CUDA)
124} // namespace cuda_impl
125
129void Median(Context const* ctx, linalg::Tensor<float, 2> const& t,
130 HostDeviceVector<float> const& weights, linalg::Tensor<float, 1>* out);
131
132void Mean(Context const* ctx, linalg::Vector<float> const& v, linalg::Vector<float>* out);
133} // namespace common
134} // namespace xgboost
135#endif // XGBOOST_COMMON_STATS_H_
A tensor view with static type and dimension.
Definition linalg.h:293
A tensor storage.
Definition linalg.h:742
Copyright 2014-2023, XGBoost Contributors.
defines console logging options for xgboost. Use to enforce unified print behavior.
Copyright 2021-2023 by XGBoost Contributors.
float WeightedQuantile(Context const *ctx, double alpha, Iter begin, Iter end, WeightIter w_begin)
Calculate the weighted quantile with step function.
Definition stats.h:79
float Quantile(Context const *ctx, double alpha, Iter const &begin, Iter const &end)
Quantile using linear interpolation.
Definition stats.h:34
void Median(Context const *ctx, linalg::Tensor< float, 2 > const &t, HostDeviceVector< float > const &weights, linalg::Tensor< float, 1 > *out)
Calculate medians for each column of the input matrix.
Definition stats.cc:20
namespace of xgboost
Definition base.h:90
Runtime context for XGBoost.
Definition context.h:84
Copyright 2015-2023 by XGBoost Contributors.