4#ifndef XGBOOST_COMMON_NUMERIC_H_
5#define XGBOOST_COMMON_NUMERIC_H_
17#include "threading_utils.h"
26template <
typename Iter,
typename Idx>
29 out = std::vector<Idx>{0};
30 size_t n = std::distance(begin, end);
31 for (
size_t i = 1; i < n; ++i) {
32 if (begin[i] != begin[i - 1]) {
36 if (out.back() != n) {
45template <
typename InIt,
typename OutIt,
typename T>
46void PartialSum(int32_t n_threads, InIt begin, InIt end, T init, OutIt out_it) {
47 static_assert(std::is_same<T, typename std::iterator_traits<InIt>::value_type>::value);
48 static_assert(std::is_same<T, typename std::iterator_traits<OutIt>::value_type>::value);
51 auto n =
static_cast<size_t>(std::distance(begin, end));
52 const size_t batch_threads =
53 std::max(
static_cast<size_t>(1), std::min(n,
static_cast<size_t>(n_threads)));
56 size_t block_size = n / batch_threads;
59#pragma omp parallel num_threads(batch_threads)
62 for (
omp_ulong tid = 0; tid < batch_threads; ++tid) {
64 size_t ibegin = block_size * tid;
65 size_t iend = (tid == (batch_threads - 1) ? n : (block_size * (tid + 1)));
68 for (
size_t ridx = ibegin; ridx < iend; ++ridx) {
69 running_sum += *(begin + ridx);
70 *(out_it + 1 + ridx) = running_sum;
78 partial_sums[0] = init;
79 for (
size_t i = 1; i < batch_threads; ++i) {
80 partial_sums[i] = partial_sums[i - 1] + *(out_it + i * block_size);
86 for (
omp_ulong tid = 0; tid < batch_threads; ++tid) {
88 size_t ibegin = block_size * tid;
89 size_t iend = (tid == (batch_threads - 1) ? n : (block_size * (tid + 1)));
91 for (
size_t i = ibegin; i < iend; ++i) {
92 *(out_it + 1 + i) += partial_sums[tid];
102#if !defined(XGBOOST_USE_CUDA)
114template <
typename It,
typename V =
typename It::value_type>
115V
Reduce(
Context const* ctx, It first, It second, V
const& init) {
116 std::size_t n = std::distance(first, second);
117 auto n_threads =
static_cast<std::size_t
>(std::min(n,
static_cast<std::size_t
>(ctx->
Threads())));
119 common::ParallelFor(n, n_threads, [&](
auto i) { result_tloc[omp_get_thread_num()] += first[i]; });
120 auto result = std::accumulate(result_tloc.cbegin(), result_tloc.cbegin() + n_threads, init);
130template <
typename It>
131void Iota(
Context const* ctx, It first, It last,
132 typename std::iterator_traits<It>::value_type
const& value) {
133 auto n = std::distance(first, last);
134 std::int32_t n_threads = ctx->
Threads();
135 const size_t block_size = n / n_threads + !!(n % n_threads);
137#pragma omp parallel num_threads(n_threads)
140 const size_t tid = omp_get_thread_num();
141 const size_t ibegin = tid * block_size;
142 const size_t iend = std::min(ibegin + block_size,
static_cast<size_t>(n));
143 for (
size_t i = ibegin; i < iend; ++i) {
144 first[i] = i + value;
OMP Exception class catches, saves and rethrows exception from OMP blocks.
Definition common.h:53
void Rethrow()
should be called from the main thread to rethrow the exception
Definition common.h:84
void Run(Function f, Parameters... params)
Parallel OMP blocks should be placed within Run to save exception.
Definition common.h:65
Definition host_device_vector.h:87
A C-style array with in-stack allocation. As long as the array is smaller than MaxStackSize,...
Definition threading_utils.h:270
Copyright 2014-2023, XGBoost Contributors.
A device-and-host vector abstraction layer.
Copyright 2017-2023, XGBoost Contributors.
Definition span.h:77
void PartialSum(int32_t n_threads, InIt begin, InIt end, T init, OutIt out_it)
Varient of std::partial_sum, out_it should point to a container that has n + 1 elements.
Definition numeric.h:46
std::int32_t constexpr DefaultMaxThreads()
Constant that can be used for initializing static thread local memory.
Definition threading_utils.h:310
double Reduce(Context const *ctx, HostDeviceVector< float > const &values)
Reduction on host device vector.
Definition numeric.cc:13
void RunLengthEncode(Iter begin, Iter end, std::vector< Idx > *p_out)
Run length encode on CPU, input must be sorted.
Definition numeric.h:27
dmlc::omp_ulong omp_ulong
define unsigned long for openmp loop
Definition base.h:322
Runtime context for XGBoost.
Definition context.h:84
std::int32_t Threads() const
Returns the automatically chosen number of threads based on the nthread parameter and the system sett...
Definition context.cc:203
defines some common utility function.
Copyright 2015-2023 by XGBoost Contributors.