Medial Code Documentation
Loading...
Searching...
No Matches
hist_util.h
Go to the documentation of this file.
1
7#ifndef XGBOOST_COMMON_HIST_UTIL_H_
8#define XGBOOST_COMMON_HIST_UTIL_H_
9
10#include <algorithm>
11#include <cstdint> // for uint32_t
12#include <limits>
13#include <map>
14#include <memory>
15#include <utility>
16#include <vector>
17
18#include "categorical.h"
19#include "quantile.h"
20#include "row_set.h"
21#include "threading_utils.h"
22#include "xgboost/base.h" // for bst_feature_t, bst_bin_t
23#include "xgboost/data.h"
24
25namespace xgboost {
26class GHistIndexMatrix;
27
28namespace common {
34
35// A CSC matrix representing histogram cuts.
36// The cut values represent upper bounds of bins containing approximately equal numbers of elements
38 bool has_categorical_{false};
39 float max_cat_{-1.0f};
40
41 protected:
42 void Swap(HistogramCuts&& that) noexcept(true) {
43 std::swap(cut_values_, that.cut_values_);
44 std::swap(cut_ptrs_, that.cut_ptrs_);
45 std::swap(min_vals_, that.min_vals_);
46
47 std::swap(has_categorical_, that.has_categorical_);
48 std::swap(max_cat_, that.max_cat_);
49 }
50
51 void Copy(HistogramCuts const& that) {
52 cut_values_.Resize(that.cut_values_.Size());
53 cut_ptrs_.Resize(that.cut_ptrs_.Size());
54 min_vals_.Resize(that.min_vals_.Size());
55 cut_values_.Copy(that.cut_values_);
56 cut_ptrs_.Copy(that.cut_ptrs_);
57 min_vals_.Copy(that.min_vals_);
58 has_categorical_ = that.has_categorical_;
59 max_cat_ = that.max_cat_;
60 }
61
62 public:
63 HostDeviceVector<float> cut_values_; // NOLINT
64 HostDeviceVector<uint32_t> cut_ptrs_; // NOLINT
65 // storing minimum value in a sketch set.
66 HostDeviceVector<float> min_vals_; // NOLINT
67
69 HistogramCuts(HistogramCuts const& that) { this->Copy(that); }
70
71 HistogramCuts(HistogramCuts&& that) noexcept(true) {
72 this->Swap(std::forward<HistogramCuts>(that));
73 }
74
75 HistogramCuts& operator=(HistogramCuts const& that) {
76 this->Copy(that);
77 return *this;
78 }
79
80 HistogramCuts& operator=(HistogramCuts&& that) noexcept(true) {
81 this->Swap(std::forward<HistogramCuts>(that));
82 return *this;
83 }
84
85 [[nodiscard]] bst_bin_t FeatureBins(bst_feature_t feature) const {
86 return cut_ptrs_.ConstHostVector().at(feature + 1) - cut_ptrs_.ConstHostVector()[feature];
87 }
88
89 std::vector<uint32_t> const& Ptrs() const { return cut_ptrs_.ConstHostVector(); }
90 std::vector<float> const& Values() const { return cut_values_.ConstHostVector(); }
91 std::vector<float> const& MinValues() const { return min_vals_.ConstHostVector(); }
92
93 [[nodiscard]] bool HasCategorical() const { return has_categorical_; }
94 [[nodiscard]] float MaxCategory() const { return max_cat_; }
101 void SetCategorical(bool has_cat, float max_cat) {
102 has_categorical_ = has_cat;
103 max_cat_ = max_cat;
104 }
105
106 [[nodiscard]] bst_bin_t TotalBins() const { return cut_ptrs_.ConstHostVector().back(); }
107
108 // Return the index of a cut point that is strictly greater than the input
109 // value, or the last available index if none exists
110 [[nodiscard]] bst_bin_t SearchBin(float value, bst_feature_t column_id,
111 std::vector<uint32_t> const& ptrs,
112 std::vector<float> const& values) const {
113 auto end = ptrs[column_id + 1];
114 auto beg = ptrs[column_id];
115 auto it = std::upper_bound(values.cbegin() + beg, values.cbegin() + end, value);
116 auto idx = it - values.cbegin();
117 idx -= !!(idx == end);
118 return idx;
119 }
120
121 [[nodiscard]] bst_bin_t SearchBin(float value, bst_feature_t column_id) const {
122 return this->SearchBin(value, column_id, Ptrs(), Values());
123 }
127 [[nodiscard]] bst_bin_t SearchBin(Entry const& e) const { return SearchBin(e.fvalue, e.index); }
128
132 [[nodiscard]] bst_bin_t SearchCatBin(float value, bst_feature_t fidx,
133 std::vector<uint32_t> const& ptrs,
134 std::vector<float> const& vals) const {
135 auto end = ptrs.at(fidx + 1) + vals.cbegin();
136 auto beg = ptrs[fidx] + vals.cbegin();
137 // Truncates the value in case it's not perfectly rounded.
138 auto v = static_cast<float>(common::AsCat(value));
139 auto bin_idx = std::lower_bound(beg, end, v) - vals.cbegin();
140 if (bin_idx == ptrs.at(fidx + 1)) {
141 bin_idx -= 1;
142 }
143 return bin_idx;
144 }
145 [[nodiscard]] bst_bin_t SearchCatBin(float value, bst_feature_t fidx) const {
146 auto const& ptrs = this->Ptrs();
147 auto const& vals = this->Values();
148 return this->SearchCatBin(value, fidx, ptrs, vals);
149 }
150 [[nodiscard]] bst_bin_t SearchCatBin(Entry const& e) const {
151 return SearchCatBin(e.fvalue, e.index);
152 }
153
157 static float NumericBinValue(std::vector<std::uint32_t> const& ptrs,
158 std::vector<float> const& vals, std::vector<float> const& mins,
159 bst_feature_t fidx, bst_bin_t bin_idx) {
160 auto lower = static_cast<bst_bin_t>(ptrs[fidx]);
161 if (bin_idx == lower) {
162 return mins[fidx];
163 }
164 return vals[bin_idx - 1];
165 }
166};
167
174HistogramCuts SketchOnDMatrix(Context const* ctx, DMatrix* m, bst_bin_t max_bins,
175 bool use_sorted = false, Span<float const> hessian = {});
176
177enum BinTypeSize : uint8_t {
178 kUint8BinsTypeSize = 1,
179 kUint16BinsTypeSize = 2,
180 kUint32BinsTypeSize = 4
181};
182
186template <typename Fn>
187auto DispatchBinType(BinTypeSize type, Fn&& fn) {
188 switch (type) {
189 case kUint8BinsTypeSize: {
190 return fn(uint8_t{});
191 }
192 case kUint16BinsTypeSize: {
193 return fn(uint16_t{});
194 }
195 case kUint32BinsTypeSize: {
196 return fn(uint32_t{});
197 }
198 }
199 LOG(FATAL) << "Unreachable";
200 return fn(uint32_t{});
201}
202
210class Index {
211 private:
212 void SetBinTypeSize(BinTypeSize binTypeSize) {
213 binTypeSize_ = binTypeSize;
214 switch (binTypeSize) {
215 case kUint8BinsTypeSize:
216 func_ = &GetValueFromUint8;
217 break;
218 case kUint16BinsTypeSize:
219 func_ = &GetValueFromUint16;
220 break;
221 case kUint32BinsTypeSize:
222 func_ = &GetValueFromUint32;
223 break;
224 default:
225 CHECK(binTypeSize == kUint8BinsTypeSize || binTypeSize == kUint16BinsTypeSize ||
226 binTypeSize == kUint32BinsTypeSize);
227 }
228 }
229
230 public:
231 // Inside the compressor, bin_idx is the index for cut value across all features. By
232 // subtracting it with starting pointer of each feature, we can reduce it to smaller
233 // value and store it with smaller types. Usable only with dense data.
234 //
235 // For sparse input we have to store an addition feature index (similar to sparse matrix
236 // formats like CSR) for each bin in index field to choose the right offset.
237 template <typename T>
238 struct CompressBin {
239 uint32_t const* offsets;
240
241 template <typename Bin, typename Feat>
242 auto operator()(Bin bin_idx, Feat fidx) const {
243 return static_cast<T>(bin_idx - offsets[fidx]);
244 }
245 };
246
247 template <typename T>
248 CompressBin<T> MakeCompressor() const {
249 uint32_t const* offsets = this->Offset();
250 return CompressBin<T>{offsets};
251 }
252
253 Index() { SetBinTypeSize(binTypeSize_); }
254
255 Index(Index const& i) = delete;
256 Index& operator=(Index const& i) = delete;
257 Index(Index&& i) = delete;
258
260 Index& operator=(Index&& i) = default;
261
268 Index(Span<std::uint8_t> data, BinTypeSize bin_size) : data_{data} {
269 this->SetBinTypeSize(bin_size);
270 }
271
272 uint32_t operator[](size_t i) const {
273 if (!bin_offset_.empty()) {
274 // dense, compressed
275 auto fidx = i % bin_offset_.size();
276 // restore the index by adding back its feature offset.
277 return func_(data_.data(), i) + bin_offset_[fidx];
278 } else {
279 return func_(data_.data(), i);
280 }
281 }
282 [[nodiscard]] BinTypeSize GetBinTypeSize() const { return binTypeSize_; }
283 template <typename T>
284 T const* data() const { // NOLINT
285 return reinterpret_cast<T const*>(data_.data());
286 }
287 template <typename T>
288 T* data() { // NOLINT
289 return reinterpret_cast<T*>(data_.data());
290 }
291 [[nodiscard]] std::uint32_t const* Offset() const { return bin_offset_.data(); }
292 [[nodiscard]] std::size_t OffsetSize() const { return bin_offset_.size(); }
293 [[nodiscard]] std::size_t Size() const { return data_.size() / (binTypeSize_); }
294
295 // set the offset used in compression, cut_ptrs is the CSC indptr in HistogramCuts
296 void SetBinOffset(std::vector<uint32_t> const& cut_ptrs) {
297 bin_offset_.resize(cut_ptrs.size() - 1); // resize to number of features.
298 std::copy_n(cut_ptrs.begin(), bin_offset_.size(), bin_offset_.begin());
299 }
300 auto begin() const { // NOLINT
301 return data_.data();
302 }
303 auto end() const { // NOLINT
304 return data_.data() + data_.size();
305 }
306
307 auto begin() { // NOLINT
308 return data_.data();
309 }
310 auto end() { // NOLINT
311 return data_.data() + data_.size();
312 }
313
314 private:
315 // Functions to decompress the index.
316 static uint32_t GetValueFromUint8(uint8_t const* t, size_t i) { return t[i]; }
317 static uint32_t GetValueFromUint16(uint8_t const* t, size_t i) {
318 return reinterpret_cast<uint16_t const*>(t)[i];
319 }
320 static uint32_t GetValueFromUint32(uint8_t const* t, size_t i) {
321 return reinterpret_cast<uint32_t const*>(t)[i];
322 }
323
324 using Func = uint32_t (*)(uint8_t const*, size_t);
325
326 Span<std::uint8_t> data_;
327 // starting position of each feature inside the cut values (the indptr of the CSC cut matrix
328 // HistogramCuts without the last entry.) Used for bin compression.
329 std::vector<uint32_t> bin_offset_;
330
331 BinTypeSize binTypeSize_{kUint8BinsTypeSize};
332 Func func_;
333};
334
335template <typename GradientIndex>
336bst_bin_t XGBOOST_HOST_DEV_INLINE BinarySearchBin(std::size_t begin, std::size_t end,
337 GradientIndex const& data,
338 bst_feature_t const fidx_begin,
339 bst_feature_t const fidx_end) {
340 size_t previous_middle = std::numeric_limits<size_t>::max();
341 while (end != begin) {
342 size_t middle = begin + (end - begin) / 2;
343 if (middle == previous_middle) {
344 break;
345 }
346 previous_middle = middle;
347
348 // index into all the bins
349 auto gidx = data[middle];
350
351 if (gidx >= fidx_begin && gidx < fidx_end) {
352 // Found the intersection.
353 return static_cast<int32_t>(gidx);
354 } else if (gidx < fidx_begin) {
355 begin = middle;
356 } else {
357 end = middle;
358 }
359 }
360 // Value is missing
361 return -1;
362}
363
364using GHistRow = Span<xgboost::GradientPairPrecise>;
365using ConstGHistRow = Span<xgboost::GradientPairPrecise const>;
366
370void IncrementHist(GHistRow dst, ConstGHistRow add, std::size_t begin, std::size_t end);
371
375void CopyHist(GHistRow dst, const GHistRow src, size_t begin, size_t end);
376
380void SubtractionHist(GHistRow dst, const GHistRow src1, const GHistRow src2, size_t begin,
381 size_t end);
382
387 public:
388 // access histogram for i-th node
389 GHistRow operator[](bst_uint nid) const {
390 constexpr uint32_t kMax = std::numeric_limits<uint32_t>::max();
391 const size_t id = row_ptr_.at(nid);
392 CHECK_NE(id, kMax);
393 GradientPairPrecise* ptr = const_cast<GradientPairPrecise*>(data_[id].data());
394 return {ptr, nbins_};
395 }
396
397 // have we computed a histogram for i-th node?
398 [[nodiscard]] bool RowExists(bst_uint nid) const {
399 const uint32_t k_max = std::numeric_limits<uint32_t>::max();
400 return (nid < row_ptr_.size() && row_ptr_[nid] != k_max);
401 }
407 void Init(std::uint32_t n_total_bins) {
408 if (nbins_ != n_total_bins) {
409 nbins_ = n_total_bins;
410 // quite expensive operation, so let's do this only once
411 data_.clear();
412 }
413 row_ptr_.clear();
414 n_nodes_added_ = 0;
415 }
416
417 // create an empty histogram for i-th node
418 void AddHistRow(bst_uint nid) {
419 constexpr uint32_t kMax = std::numeric_limits<uint32_t>::max();
420 if (nid >= row_ptr_.size()) {
421 row_ptr_.resize(nid + 1, kMax);
422 }
423 CHECK_EQ(row_ptr_[nid], kMax);
424
425 if (data_.size() < (nid + 1)) {
426 data_.resize((nid + 1));
427 }
428
429 row_ptr_[nid] = n_nodes_added_;
430 n_nodes_added_++;
431 }
432 // allocate thread local memory i-th node
433 void AllocateData(bst_uint nid) {
434 if (data_[row_ptr_[nid]].size() == 0) {
435 data_[row_ptr_[nid]].resize(nbins_, {0, 0});
436 }
437 }
438
439 private:
441 uint32_t nbins_ = 0;
443 uint32_t n_nodes_added_ = 0;
444 std::vector<std::vector<GradientPairPrecise>> data_;
445
447 std::vector<size_t> row_ptr_;
448};
449
456 public:
457 void Init(size_t nbins) {
458 if (nbins != nbins_) {
459 hist_buffer_.Init(nbins);
460 nbins_ = nbins;
461 }
462 }
463
464 // Add new elements if needed, mark all hists as unused
465 // targeted_hists - already allocated hists which should contain final results after Reduce() call
466 void Reset(size_t nthreads, size_t nodes, const BlockedSpace2d& space,
467 const std::vector<GHistRow>& targeted_hists) {
468 hist_buffer_.Init(nbins_);
469 tid_nid_to_hist_.clear();
470 threads_to_nids_map_.clear();
471
472 targeted_hists_ = targeted_hists;
473
474 CHECK_EQ(nodes, targeted_hists.size());
475
476 nodes_ = nodes;
477 nthreads_ = nthreads;
478
479 MatchThreadsToNodes(space);
480 AllocateAdditionalHistograms();
481 MatchNodeNidPairToHist();
482
483 hist_was_used_.resize(nthreads * nodes_);
484 std::fill(hist_was_used_.begin(), hist_was_used_.end(), static_cast<int>(false));
485 }
486
487 // Get specified hist, initialize hist by zeros if it wasn't used before
488 GHistRow GetInitializedHist(size_t tid, size_t nid) {
489 CHECK_LT(nid, nodes_);
490 CHECK_LT(tid, nthreads_);
491
492 int idx = tid_nid_to_hist_.at({tid, nid});
493 if (idx >= 0) {
494 hist_buffer_.AllocateData(idx);
495 }
496 GHistRow hist = idx == -1 ? targeted_hists_[nid] : hist_buffer_[idx];
497
498 if (!hist_was_used_[tid * nodes_ + nid]) {
499 std::fill_n(hist.data(), hist.size(), GradientPairPrecise{});
500 hist_was_used_[tid * nodes_ + nid] = static_cast<int>(true);
501 }
502
503 return hist;
504 }
505
506 // Reduce following bins (begin, end] for nid-node in dst across threads
507 void ReduceHist(size_t nid, size_t begin, size_t end) const {
508 CHECK_GT(end, begin);
509 CHECK_LT(nid, nodes_);
510
511 GHistRow dst = targeted_hists_[nid];
512
513 bool is_updated = false;
514 for (size_t tid = 0; tid < nthreads_; ++tid) {
515 if (hist_was_used_[tid * nodes_ + nid]) {
516 is_updated = true;
517
518 int idx = tid_nid_to_hist_.at({tid, nid});
519 GHistRow src = idx == -1 ? targeted_hists_[nid] : hist_buffer_[idx];
520
521 if (dst.data() != src.data()) {
522 IncrementHist(dst, src, begin, end);
523 }
524 }
525 }
526 if (!is_updated) {
527 // In distributed mode - some tree nodes can be empty on local machines,
528 // So we need just set local hist by zeros in this case
529 std::fill(dst.data() + begin, dst.data() + end, GradientPairPrecise{});
530 }
531 }
532
533 void MatchThreadsToNodes(const BlockedSpace2d& space) {
534 const size_t space_size = space.Size();
535 const size_t chunck_size = space_size / nthreads_ + !!(space_size % nthreads_);
536
537 threads_to_nids_map_.resize(nthreads_ * nodes_, false);
538
539 for (size_t tid = 0; tid < nthreads_; ++tid) {
540 size_t begin = chunck_size * tid;
541 size_t end = std::min(begin + chunck_size, space_size);
542
543 if (begin < space_size) {
544 size_t nid_begin = space.GetFirstDimension(begin);
545 size_t nid_end = space.GetFirstDimension(end-1);
546
547 for (size_t nid = nid_begin; nid <= nid_end; ++nid) {
548 // true - means thread 'tid' will work to compute partial hist for node 'nid'
549 threads_to_nids_map_[tid * nodes_ + nid] = true;
550 }
551 }
552 }
553 }
554
555 void AllocateAdditionalHistograms() {
556 size_t hist_allocated_additionally = 0;
557
558 for (size_t nid = 0; nid < nodes_; ++nid) {
559 int nthreads_for_nid = 0;
560
561 for (size_t tid = 0; tid < nthreads_; ++tid) {
562 if (threads_to_nids_map_[tid * nodes_ + nid]) {
563 nthreads_for_nid++;
564 }
565 }
566
567 // In distributed mode - some tree nodes can be empty on local machines,
568 // set nthreads_for_nid to 0 in this case.
569 // In another case - allocate additional (nthreads_for_nid - 1) histograms,
570 // because one is already allocated externally (will store final result for the node).
571 hist_allocated_additionally += std::max<int>(0, nthreads_for_nid - 1);
572 }
573
574 for (size_t i = 0; i < hist_allocated_additionally; ++i) {
575 hist_buffer_.AddHistRow(i);
576 }
577 }
578
579 [[nodiscard]] bst_bin_t TotalBins() const { return nbins_; }
580
581 private:
582 void MatchNodeNidPairToHist() {
583 size_t hist_allocated_additionally = 0;
584
585 for (size_t nid = 0; nid < nodes_; ++nid) {
586 bool first_hist = true;
587 for (size_t tid = 0; tid < nthreads_; ++tid) {
588 if (threads_to_nids_map_[tid * nodes_ + nid]) {
589 if (first_hist) {
590 tid_nid_to_hist_[{tid, nid}] = -1;
591 first_hist = false;
592 } else {
593 tid_nid_to_hist_[{tid, nid}] = hist_allocated_additionally++;
594 }
595 }
596 }
597 }
598 }
599
601 size_t nbins_ = 0;
603 size_t nthreads_ = 0;
605 size_t nodes_ = 0;
607 HistCollection hist_buffer_;
613 std::vector<int> hist_was_used_;
614
616 std::vector<bool> threads_to_nids_map_;
618 std::vector<GHistRow> targeted_hists_;
623 std::map<std::pair<size_t, size_t>, int> tid_nid_to_hist_;
624};
625
626// construct a histogram via histogram aggregation
627template <bool any_missing>
628void BuildHist(Span<GradientPair const> gpair, const RowSetCollection::Elem row_indices,
629 const GHistIndexMatrix& gmat, GHistRow hist, bool force_read_by_column = false);
630} // namespace common
631} // namespace xgboost
632#endif // XGBOOST_COMMON_HIST_UTIL_H_
Copyright 2020-2023, XGBoost Contributors.
Internal data structured used by XGBoost during training.
Definition data.h:509
preprocessed global index matrix, in CSR format.
Definition gradient_index.h:38
Definition host_device_vector.h:87
Definition threading_utils.h:74
histogram of gradient statistics for multiple nodes
Definition hist_util.h:386
void Init(std::uint32_t n_total_bins)
Initialize histogram collection.
Definition hist_util.h:407
Definition hist_util.h:37
void SetCategorical(bool has_cat, float max_cat)
Set meta info about categorical features.
Definition hist_util.h:101
static float NumericBinValue(std::vector< std::uint32_t > const &ptrs, std::vector< float > const &vals, std::vector< float > const &mins, bst_feature_t fidx, bst_bin_t bin_idx)
Return numerical bin value given bin index.
Definition hist_util.h:157
bst_bin_t SearchBin(Entry const &e) const
Search the bin index for numerical feature.
Definition hist_util.h:127
bst_bin_t SearchCatBin(float value, bst_feature_t fidx, std::vector< uint32_t > const &ptrs, std::vector< float > const &vals) const
Search the bin index for categorical feature.
Definition hist_util.h:132
Optionally compressed gradient index.
Definition hist_util.h:210
Index(Span< std::uint8_t > data, BinTypeSize bin_size)
Construct the index from data.
Definition hist_util.h:268
Index & operator=(Index &&i)=default
Move assignment for lazy initialization.
Stores temporary histograms to compute them in parallel Supports processing multiple tree-nodes for n...
Definition hist_util.h:455
span class implementation, based on ISO++20 span<T>. The interface should be the same.
Definition span.h:424
Copyright 2015-2023 by XGBoost Contributors.
Copyright 2015-2023 by XGBoost Contributors.
NLOHMANN_BASIC_JSON_TPL_DECLARATION void swap(nlohmann::NLOHMANN_BASIC_JSON_TPL &j1, nlohmann::NLOHMANN_BASIC_JSON_TPL &j2) noexcept(//NOLINT(readability-inconsistent-declaration-parameter-name, cert-dcl58-cpp) is_nothrow_move_constructible< nlohmann::NLOHMANN_BASIC_JSON_TPL >::value &&//NOLINT(misc-redundant-expression) is_nothrow_move_assignable< nlohmann::NLOHMANN_BASIC_JSON_TPL >::value)
exchanges the values of two JSON objects
Definition json.hpp:24418
auto DispatchBinType(BinTypeSize type, Fn &&fn)
Dispatch for bin type, fn is a function that accepts a scalar of the bin type.
Definition hist_util.h:187
HistogramCuts SketchOnDMatrix(Context const *ctx, DMatrix *m, bst_bin_t max_bins, bool use_sorted, Span< float const > hessian)
Run CPU sketching on DMatrix.
Definition hist_util.cc:32
void IncrementHist(GHistRow dst, ConstGHistRow add, std::size_t begin, std::size_t end)
Increment hist as dst += add in range [begin, end)
Definition hist_util.cc:73
void CopyHist(GHistRow dst, const GHistRow src, size_t begin, size_t end)
Copy hist from src to dst in range [begin, end)
Definition hist_util.cc:85
void SubtractionHist(GHistRow dst, const GHistRow src1, const GHistRow src2, size_t begin, size_t end)
Compute Subtraction: dst = src1 - src2 in range [begin, end)
Definition hist_util.cc:97
namespace of xgboost
Definition base.h:90
uint32_t bst_feature_t
Type for data column (feature) index.
Definition base.h:101
uint32_t bst_uint
unsigned integer type used for feature index.
Definition base.h:93
int32_t bst_bin_t
Type for histogram bin index.
Definition base.h:103
Copyright 2014-2023 by XGBoost Contributors.
Copyright 2021-2023 by Contributors.
Runtime context for XGBoost.
Definition context.h:84
Element from a sparse vector.
Definition data.h:216
Definition hist_util.h:238
data structure to store an instance set, a subset of rows (instances) associated with a particular no...
Definition row_set.h:30