Medial Code Documentation
Loading...
Searching...
No Matches
numeric.h
1
4#ifndef XGBOOST_COMMON_NUMERIC_H_
5#define XGBOOST_COMMON_NUMERIC_H_
6
7#include <dmlc/common.h> // OMPException
8
9#include <algorithm> // for std::max
10#include <cstddef> // for size_t
11#include <cstdint> // for int32_t
12#include <iterator> // for iterator_traits
13#include <numeric> // for accumulate
14#include <vector>
15
16#include "common.h" // AssertGPUSupport
17#include "threading_utils.h" // MemStackAllocator, DefaultMaxThreads
18#include "xgboost/context.h" // Context
19#include "xgboost/host_device_vector.h" // HostDeviceVector
20
21namespace xgboost::common {
22
26template <typename Iter, typename Idx>
27void RunLengthEncode(Iter begin, Iter end, std::vector<Idx>* p_out) {
28 auto& out = *p_out;
29 out = std::vector<Idx>{0};
30 size_t n = std::distance(begin, end);
31 for (size_t i = 1; i < n; ++i) {
32 if (begin[i] != begin[i - 1]) {
33 out.push_back(i);
34 }
35 }
36 if (out.back() != n) {
37 out.push_back(n);
38 }
39}
40
45template <typename InIt, typename OutIt, typename T>
46void PartialSum(int32_t n_threads, InIt begin, InIt end, T init, OutIt out_it) {
47 static_assert(std::is_same<T, typename std::iterator_traits<InIt>::value_type>::value);
48 static_assert(std::is_same<T, typename std::iterator_traits<OutIt>::value_type>::value);
49 // The number of threads is pegged to the batch size. If the OMP block is parallelized
50 // on anything other than the batch/block size, it should be reassigned
51 auto n = static_cast<size_t>(std::distance(begin, end));
52 const size_t batch_threads =
53 std::max(static_cast<size_t>(1), std::min(n, static_cast<size_t>(n_threads)));
54 MemStackAllocator<T, DefaultMaxThreads()> partial_sums(batch_threads);
55
56 size_t block_size = n / batch_threads;
57
59#pragma omp parallel num_threads(batch_threads)
60 {
61#pragma omp for
62 for (omp_ulong tid = 0; tid < batch_threads; ++tid) {
63 exc.Run([&]() {
64 size_t ibegin = block_size * tid;
65 size_t iend = (tid == (batch_threads - 1) ? n : (block_size * (tid + 1)));
66
67 T running_sum = 0;
68 for (size_t ridx = ibegin; ridx < iend; ++ridx) {
69 running_sum += *(begin + ridx);
70 *(out_it + 1 + ridx) = running_sum;
71 }
72 });
73 }
74
75#pragma omp single
76 {
77 exc.Run([&]() {
78 partial_sums[0] = init;
79 for (size_t i = 1; i < batch_threads; ++i) {
80 partial_sums[i] = partial_sums[i - 1] + *(out_it + i * block_size);
81 }
82 });
83 }
84
85#pragma omp for
86 for (omp_ulong tid = 0; tid < batch_threads; ++tid) {
87 exc.Run([&]() {
88 size_t ibegin = block_size * tid;
89 size_t iend = (tid == (batch_threads - 1) ? n : (block_size * (tid + 1)));
90
91 for (size_t i = ibegin; i < iend; ++i) {
92 *(out_it + 1 + i) += partial_sums[tid];
93 }
94 });
95 }
96 }
97 exc.Rethrow();
98}
99
100namespace cuda_impl {
101double Reduce(Context const* ctx, HostDeviceVector<float> const& values);
102#if !defined(XGBOOST_USE_CUDA)
103inline double Reduce(Context const*, HostDeviceVector<float> const&) {
104 AssertGPUSupport();
105 return 0;
106}
107#endif // !defined(XGBOOST_USE_CUDA)
108} // namespace cuda_impl
109
113namespace cpu_impl {
114template <typename It, typename V = typename It::value_type>
115V Reduce(Context const* ctx, It first, It second, V const& init) {
116 std::size_t n = std::distance(first, second);
117 auto n_threads = static_cast<std::size_t>(std::min(n, static_cast<std::size_t>(ctx->Threads())));
118 common::MemStackAllocator<V, common::DefaultMaxThreads()> result_tloc(n_threads, init);
119 common::ParallelFor(n, n_threads, [&](auto i) { result_tloc[omp_get_thread_num()] += first[i]; });
120 auto result = std::accumulate(result_tloc.cbegin(), result_tloc.cbegin() + n_threads, init);
121 return result;
122}
123} // namespace cpu_impl
124
128double Reduce(Context const* ctx, HostDeviceVector<float> const& values);
129
130template <typename It>
131void Iota(Context const* ctx, It first, It last,
132 typename std::iterator_traits<It>::value_type const& value) {
133 auto n = std::distance(first, last);
134 std::int32_t n_threads = ctx->Threads();
135 const size_t block_size = n / n_threads + !!(n % n_threads);
137#pragma omp parallel num_threads(n_threads)
138 {
139 exc.Run([&]() {
140 const size_t tid = omp_get_thread_num();
141 const size_t ibegin = tid * block_size;
142 const size_t iend = std::min(ibegin + block_size, static_cast<size_t>(n));
143 for (size_t i = ibegin; i < iend; ++i) {
144 first[i] = i + value;
145 }
146 });
147 }
148}
149} // namespace xgboost::common
150
151#endif // XGBOOST_COMMON_NUMERIC_H_
OMP Exception class catches, saves and rethrows exception from OMP blocks.
Definition common.h:53
void Rethrow()
should be called from the main thread to rethrow the exception
Definition common.h:84
void Run(Function f, Parameters... params)
Parallel OMP blocks should be placed within Run to save exception.
Definition common.h:65
Definition host_device_vector.h:87
A C-style array with in-stack allocation. As long as the array is smaller than MaxStackSize,...
Definition threading_utils.h:270
Copyright 2014-2023, XGBoost Contributors.
A device-and-host vector abstraction layer.
Copyright 2017-2023, XGBoost Contributors.
Definition span.h:77
void PartialSum(int32_t n_threads, InIt begin, InIt end, T init, OutIt out_it)
Varient of std::partial_sum, out_it should point to a container that has n + 1 elements.
Definition numeric.h:46
std::int32_t constexpr DefaultMaxThreads()
Constant that can be used for initializing static thread local memory.
Definition threading_utils.h:310
double Reduce(Context const *ctx, HostDeviceVector< float > const &values)
Reduction on host device vector.
Definition numeric.cc:13
void RunLengthEncode(Iter begin, Iter end, std::vector< Idx > *p_out)
Run length encode on CPU, input must be sorted.
Definition numeric.h:27
dmlc::omp_ulong omp_ulong
define unsigned long for openmp loop
Definition base.h:322
Runtime context for XGBoost.
Definition context.h:84
std::int32_t Threads() const
Returns the automatically chosen number of threads based on the nthread parameter and the system sett...
Definition context.cc:203
defines some common utility function.
Copyright 2015-2023 by XGBoost Contributors.