Medial Code Documentation
Loading...
Searching...
No Matches
transform.h
1
4#ifndef XGBOOST_COMMON_TRANSFORM_H_
5#define XGBOOST_COMMON_TRANSFORM_H_
6
7#include <dmlc/common.h>
8#include <dmlc/omp.h>
9#include <xgboost/data.h>
10
11#include <type_traits> // enable_if
12#include <utility>
13#include <vector>
14
15#include "common.h"
16#include "threading_utils.h"
18#include "xgboost/span.h"
19
20#if defined (__CUDACC__)
21#include "device_helpers.cuh"
22#endif // defined (__CUDACC__)
23
24namespace xgboost {
25namespace common {
26
27constexpr size_t kBlockThreads = 256;
28
29namespace detail {
30
31#if defined(__CUDACC__)
32template <typename Functor, typename... SpanType>
33__global__ void LaunchCUDAKernel(Functor _func, Range _range,
34 SpanType... _spans) {
35 for (auto i : dh::GridStrideRange(*_range.begin(), *_range.end())) {
36 _func(i, _spans...);
37 }
38}
39#endif // defined(__CUDACC__)
40
41} // namespace detail
42
57template <bool CompiledWithCuda = WITH_CUDA()>
58class Transform {
59 private:
60 template <typename Functor>
61 struct Evaluator {
62 public:
63 Evaluator(Functor func, Range range, int32_t n_threads, int32_t device_idx)
64 : func_(func), range_{std::move(range)}, n_threads_{n_threads}, device_{device_idx} {}
65
72 template <typename... HDV>
73 void Eval(HDV... vectors) const {
74 bool on_device = device_ >= 0;
75
76 if (on_device) {
77 LaunchCUDA(func_, vectors...);
78 } else {
79 LaunchCPU(func_, vectors...);
80 }
81 }
82
83 private:
84 // CUDA UnpackHDV
85 template <typename T>
86 Span<T> UnpackHDVOnDevice(HostDeviceVector<T>* _vec) const {
87 auto span = _vec->DeviceSpan();
88 return span;
89 }
90 template <typename T>
91 Span<T const> UnpackHDVOnDevice(const HostDeviceVector<T>* _vec) const {
92 auto span = _vec->ConstDeviceSpan();
93 return span;
94 }
95 // CPU UnpackHDV
96 template <typename T>
97 Span<T> UnpackHDV(HostDeviceVector<T>* _vec) const {
98 return Span<T> {_vec->HostPointer(),
99 static_cast<typename Span<T>::index_type>(_vec->Size())};
100 }
101 template <typename T>
102 Span<T const> UnpackHDV(const HostDeviceVector<T>* _vec) const {
103 return Span<T const> {_vec->ConstHostPointer(),
104 static_cast<typename Span<T>::index_type>(_vec->Size())};
105 }
106 // Recursive sync host
107 template <typename T>
108 void SyncHost(const HostDeviceVector<T> *_vector) const {
109 _vector->ConstHostPointer();
110 }
111 template <typename Head, typename... Rest>
112 void SyncHost(const HostDeviceVector<Head> *_vector,
113 const HostDeviceVector<Rest> *... _vectors) const {
114 _vector->ConstHostPointer();
115 SyncHost(_vectors...);
116 }
117 // Recursive unpack for Shard.
118 template <typename T>
119 void UnpackShard(int device, const HostDeviceVector<T> *vector) const {
120 vector->SetDevice(device);
121 }
122 template <typename Head, typename... Rest>
123 void UnpackShard(int device,
124 const HostDeviceVector<Head> *_vector,
125 const HostDeviceVector<Rest> *... _vectors) const {
126 _vector->SetDevice(device);
127 UnpackShard(device, _vectors...);
128 }
129
130#if defined(__CUDACC__)
131 template <typename std::enable_if<CompiledWithCuda>::type* = nullptr,
132 typename... HDV>
133 void LaunchCUDA(Functor _func, HDV*... _vectors) const {
134 UnpackShard(device_, _vectors...);
135
136 size_t range_size = *range_.end() - *range_.begin();
137
138 // Extract index to deal with possible old OpenMP.
139 // This deals with situation like multi-class setting where
140 // granularity is used in data vector.
141 size_t shard_size = range_size;
142 Range shard_range {0, static_cast<Range::DifferenceType>(shard_size)};
143 dh::safe_cuda(cudaSetDevice(device_));
144 const int kGrids =
145 static_cast<int>(DivRoundUp(*(range_.end()), kBlockThreads));
146 if (kGrids == 0) {
147 return;
148 }
149 detail::LaunchCUDAKernel<<<kGrids, kBlockThreads>>>( // NOLINT
150 _func, shard_range, UnpackHDVOnDevice(_vectors)...);
151 }
152#else
154 template <typename std::enable_if<!CompiledWithCuda>::type* = nullptr,
155 typename... HDV>
156 void LaunchCUDA(Functor _func, HDV*...) const {
157 // Remove unused parameter compiler warning.
158 (void) _func;
159
160 LOG(FATAL) << "Not part of device code. WITH_CUDA: " << WITH_CUDA();
161 }
162#endif // defined(__CUDACC__)
163
164 template <typename... HDV>
165 void LaunchCPU(Functor func, HDV *...vectors) const {
166 omp_ulong end = static_cast<omp_ulong>(*(range_.end()));
167 SyncHost(vectors...);
168 ParallelFor(end, n_threads_, [&](omp_ulong idx) { func(idx, UnpackHDV(vectors)...); });
169 }
170
171 private:
173 Functor func_;
175 Range range_;
176 int32_t n_threads_;
177 int32_t device_;
178 };
179
180 public:
193 template <typename Functor>
194 static Evaluator<Functor> Init(Functor func, Range const range, int32_t n_threads,
195 int32_t device_idx) {
196 return Evaluator<Functor>{func, std::move(range), n_threads, device_idx};
197 }
198};
199
200} // namespace common
201} // namespace xgboost
202
203#endif // XGBOOST_COMMON_TRANSFORM_H_
Definition host_device_vector.h:87
Definition common.h:98
span class implementation, based on ISO++20 span<T>. The interface should be the same.
Definition span.h:424
Do Transformation on HostDeviceVectors.
Definition transform.h:58
static Evaluator< Functor > Init(Functor func, Range const range, int32_t n_threads, int32_t device_idx)
Initialize a Transform object.
Definition transform.h:194
A device-and-host vector abstraction layer.
Copyright 2015-2023 by XGBoost Contributors.
detail namespace with internal helper functions
Definition json.hpp:249
namespace of xgboost
Definition base.h:90
dmlc::omp_ulong omp_ulong
define unsigned long for openmp loop
Definition base.h:322
header to handle OpenMP compatibility issues
defines some common utility function.
Copyright 2015-2023 by XGBoost Contributors.