Medial Code Documentation
Loading...
Searching...
No Matches
histogram.h
1
4#ifndef XGBOOST_TREE_HIST_HISTOGRAM_H_
5#define XGBOOST_TREE_HIST_HISTOGRAM_H_
6
7#include <algorithm> // for max
8#include <cstddef> // for size_t
9#include <cstdint> // for int32_t
10#include <functional> // for function
11#include <utility> // for move
12#include <vector> // for vector
13
14#include "../../collective/communicator-inl.h" // for Allreduce
15#include "../../collective/communicator.h" // for Operation
16#include "../../common/hist_util.h" // for GHistRow, ParallelGHi...
17#include "../../common/row_set.h" // for RowSetCollection
18#include "../../common/threading_utils.h" // for ParallelFor2d, Range1d, BlockedSpace2d
19#include "../../data/gradient_index.h" // for GHistIndexMatrix
20#include "expand_entry.h" // for MultiExpandEntry, CPUExpandEntry
21#include "hist_cache.h" // for BoundedHistCollection
22#include "param.h" // for HistMakerTrainParam
23#include "xgboost/base.h" // for bst_node_t, bst_target_t, bst_bin_t
24#include "xgboost/context.h" // for Context
25#include "xgboost/data.h" // for BatchIterator, BatchSet
26#include "xgboost/linalg.h" // for MatrixView, All, Vect...
27#include "xgboost/logging.h" // for CHECK_GE
28#include "xgboost/span.h" // for Span
29#include "xgboost/tree_model.h" // for RegTree
30
31namespace xgboost::tree {
35void AssignNodes(RegTree const *p_tree, std::vector<MultiExpandEntry> const &valid_candidates,
36 common::Span<bst_node_t> nodes_to_build, common::Span<bst_node_t> nodes_to_sub);
37
41void AssignNodes(RegTree const *p_tree, std::vector<CPUExpandEntry> const &candidates,
42 common::Span<bst_node_t> nodes_to_build, common::Span<bst_node_t> nodes_to_sub);
43
48 BatchParam param_;
49 int32_t n_threads_{-1};
50 // Whether XGBoost is running in distributed environment.
51 bool is_distributed_{false};
52 bool is_col_split_{false};
53
54 public:
62 void Reset(Context const *ctx, bst_bin_t total_bins, BatchParam const &p, bool is_distributed,
63 bool is_col_split, HistMakerTrainParam const *param) {
64 n_threads_ = ctx->Threads();
65 param_ = p;
66 hist_.Reset(total_bins, param->max_cached_hist_node);
67 buffer_.Init(total_bins);
68 is_distributed_ = is_distributed;
69 is_col_split_ = is_col_split;
70 }
71
72 template <bool any_missing>
73 void BuildLocalHistograms(common::BlockedSpace2d const &space, GHistIndexMatrix const &gidx,
74 std::vector<bst_node_t> const &nodes_to_build,
75 common::RowSetCollection const &row_set_collection,
76 common::Span<GradientPair const> gpair_h, bool force_read_by_column) {
77 // Parallel processing by nodes and data in each node
78 common::ParallelFor2d(space, this->n_threads_, [&](size_t nid_in_set, common::Range1d r) {
79 const auto tid = static_cast<unsigned>(omp_get_thread_num());
80 bst_node_t const nidx = nodes_to_build[nid_in_set];
81 auto elem = row_set_collection[nidx];
82 auto start_of_row_set = std::min(r.begin(), elem.Size());
83 auto end_of_row_set = std::min(r.end(), elem.Size());
84 auto rid_set = common::RowSetCollection::Elem(elem.begin + start_of_row_set,
85 elem.begin + end_of_row_set, nidx);
86 auto hist = buffer_.GetInitializedHist(tid, nid_in_set);
87 if (rid_set.Size() != 0) {
88 common::BuildHist<any_missing>(gpair_h, rid_set, gidx, hist, force_read_by_column);
89 }
90 });
91 }
92
97 void AddHistRows(RegTree const *p_tree, std::vector<bst_node_t> *p_nodes_to_build,
98 std::vector<bst_node_t> *p_nodes_to_sub, bool rearrange) {
99 CHECK(p_nodes_to_build);
100 auto &nodes_to_build = *p_nodes_to_build;
101 CHECK(p_nodes_to_sub);
102 auto &nodes_to_sub = *p_nodes_to_sub;
103
104 // We first check whether the cache size is already exceeded or about to be exceeded.
105 // If not, then we can allocate histograms without clearing the cache and without
106 // worrying about missing parent histogram.
107 //
108 // Otherwise, we need to rearrange the nodes before the allocation to make sure the
109 // resulting buffer is contiguous. This is to facilitate efficient allreduce.
110
111 bool can_host = this->hist_.CanHost(nodes_to_build, nodes_to_sub);
112 // True if the tree is still within the size of cache limit. Allocate histogram as
113 // usual.
114 auto cache_is_valid = can_host && !this->hist_.HasExceeded();
115
116 if (!can_host) {
117 this->hist_.Clear(true);
118 }
119
120 if (!rearrange || cache_is_valid) {
121 // If not rearrange, we allocate the histogram as usual, assuming the nodes have
122 // been properly arranged by other builders.
123 this->hist_.AllocateHistograms(nodes_to_build, nodes_to_sub);
124 if (rearrange) {
125 CHECK(!this->hist_.HasExceeded());
126 }
127 return;
128 }
129
130 // The cache is full, parent histogram might be removed in previous iterations to
131 // saved memory.
132 std::vector<bst_node_t> can_subtract;
133 for (auto const &v : nodes_to_sub) {
134 if (this->hist_.HistogramExists(p_tree->Parent(v))) {
135 // We can still use the subtraction trick for this node
136 can_subtract.push_back(v);
137 } else {
138 // This node requires a full build
139 nodes_to_build.push_back(v);
140 }
141 }
142
143 nodes_to_sub = std::move(can_subtract);
144 this->hist_.AllocateHistograms(nodes_to_build, nodes_to_sub);
145 }
146
148 void BuildHist(std::size_t page_idx, common::BlockedSpace2d const &space,
149 GHistIndexMatrix const &gidx, common::RowSetCollection const &row_set_collection,
150 std::vector<bst_node_t> const &nodes_to_build,
151 linalg::VectorView<GradientPair const> gpair, bool force_read_by_column = false) {
152 CHECK(gpair.Contiguous());
153
154 if (page_idx == 0) {
155 // Add the local histogram cache to the parallel buffer before processing the first page.
156 auto n_nodes = nodes_to_build.size();
157 std::vector<common::GHistRow> target_hists(n_nodes);
158 for (size_t i = 0; i < n_nodes; ++i) {
159 auto const nidx = nodes_to_build[i];
160 target_hists[i] = hist_[nidx];
161 }
162 buffer_.Reset(this->n_threads_, n_nodes, space, target_hists);
163 }
164
165 if (gidx.IsDense()) {
166 this->BuildLocalHistograms<false>(space, gidx, nodes_to_build, row_set_collection,
167 gpair.Values(), force_read_by_column);
168 } else {
169 this->BuildLocalHistograms<true>(space, gidx, nodes_to_build, row_set_collection,
170 gpair.Values(), force_read_by_column);
171 }
172 }
173
174 void SyncHistogram(RegTree const *p_tree, std::vector<bst_node_t> const &nodes_to_build,
175 std::vector<bst_node_t> const &nodes_to_trick) {
176 auto n_total_bins = buffer_.TotalBins();
178 nodes_to_build.size(), [&](std::size_t) { return n_total_bins; }, 1024);
179 common::ParallelFor2d(space, this->n_threads_, [&](size_t node, common::Range1d r) {
180 // Merging histograms from each thread.
181 this->buffer_.ReduceHist(node, r.begin(), r.end());
182 });
183 if (is_distributed_ && !is_col_split_) {
184 // The cache is contiguous, we can perform allreduce for all nodes in one go.
185 CHECK(!nodes_to_build.empty());
186 auto first_nidx = nodes_to_build.front();
187 std::size_t n = n_total_bins * nodes_to_build.size() * 2;
188 collective::Allreduce<collective::Operation::kSum>(
189 reinterpret_cast<double *>(this->hist_[first_nidx].data()), n);
190 }
191
192 common::BlockedSpace2d const &subspace =
193 nodes_to_trick.size() == nodes_to_build.size()
194 ? space
195 : common::BlockedSpace2d{nodes_to_trick.size(),
196 [&](std::size_t) { return n_total_bins; }, 1024};
197 common::ParallelFor2d(
198 subspace, this->n_threads_, [&](std::size_t nidx_in_set, common::Range1d r) {
199 auto subtraction_nidx = nodes_to_trick[nidx_in_set];
200 auto parent_id = p_tree->Parent(subtraction_nidx);
201 auto sibling_nidx = p_tree->IsLeftChild(subtraction_nidx) ? p_tree->RightChild(parent_id)
202 : p_tree->LeftChild(parent_id);
203 auto sibling_hist = this->hist_[sibling_nidx];
204 auto parent_hist = this->hist_[parent_id];
205 auto subtract_hist = this->hist_[subtraction_nidx];
206 common::SubtractionHist(subtract_hist, parent_hist, sibling_hist, r.begin(), r.end());
207 });
208 }
209
210 public:
211 /* Getters for tests. */
212 [[nodiscard]] BoundedHistCollection const &Histogram() const { return hist_; }
213 [[nodiscard]] BoundedHistCollection &Histogram() { return hist_; }
214 auto &Buffer() { return buffer_; }
215};
216
217// Construct a work space for building histogram. Eventually we should move this
218// function into histogram builder once hist tree method supports external memory.
219template <typename Partitioner>
220common::BlockedSpace2d ConstructHistSpace(Partitioner const &partitioners,
221 std::vector<bst_node_t> const &nodes_to_build) {
222 // FIXME(jiamingy): Handle different size of space. Right now we use the maximum
223 // partition size for the buffer, which might not be efficient if partition sizes
224 // has significant variance.
225 std::vector<std::size_t> partition_size(nodes_to_build.size(), 0);
226 for (auto const &partition : partitioners) {
227 size_t k = 0;
228 for (auto nidx : nodes_to_build) {
229 auto n_rows_in_node = partition.Partitions()[nidx].Size();
230 partition_size[k] = std::max(partition_size[k], n_rows_in_node);
231 k++;
232 }
233 }
234 common::BlockedSpace2d space{
235 nodes_to_build.size(), [&](size_t nidx_in_set) { return partition_size[nidx_in_set]; }, 256};
236 return space;
237}
238
243 std::vector<HistogramBuilder> target_builders_;
244 Context const *ctx_;
245
246 public:
250 template <typename Partitioner, typename ExpandEntry>
251 void BuildRootHist(DMatrix *p_fmat, RegTree const *p_tree,
252 std::vector<Partitioner> const &partitioners,
253 linalg::MatrixView<GradientPair const> gpair, ExpandEntry const &best,
254 BatchParam const &param, bool force_read_by_column = false) {
255 auto n_targets = p_tree->NumTargets();
256 CHECK_EQ(gpair.Shape(1), n_targets);
257 CHECK_EQ(p_fmat->Info().num_row_, gpair.Shape(0));
258 CHECK_EQ(target_builders_.size(), n_targets);
259 std::vector<bst_node_t> nodes{best.nid};
260 std::vector<bst_node_t> dummy_sub;
261
262 auto space = ConstructHistSpace(partitioners, nodes);
263 for (bst_target_t t{0}; t < n_targets; ++t) {
264 this->target_builders_[t].AddHistRows(p_tree, &nodes, &dummy_sub, false);
265 }
266 CHECK(dummy_sub.empty());
267
268 std::size_t page_idx{0};
269 for (auto const &gidx : p_fmat->GetBatches<GHistIndexMatrix>(ctx_, param)) {
270 for (bst_target_t t{0}; t < n_targets; ++t) {
271 auto t_gpair = gpair.Slice(linalg::All(), t);
272 this->target_builders_[t].BuildHist(page_idx, space, gidx,
273 partitioners[page_idx].Partitions(), nodes, t_gpair,
274 force_read_by_column);
275 }
276 ++page_idx;
277 }
278
279 for (bst_target_t t = 0; t < p_tree->NumTargets(); ++t) {
280 this->target_builders_[t].SyncHistogram(p_tree, nodes, dummy_sub);
281 }
282 }
286 template <typename Partitioner, typename ExpandEntry>
287 void BuildHistLeftRight(DMatrix *p_fmat, RegTree const *p_tree,
288 std::vector<Partitioner> const &partitioners,
289 std::vector<ExpandEntry> const &valid_candidates,
291 bool force_read_by_column = false) {
292 std::vector<bst_node_t> nodes_to_build(valid_candidates.size());
293 std::vector<bst_node_t> nodes_to_sub(valid_candidates.size());
294 AssignNodes(p_tree, valid_candidates, nodes_to_build, nodes_to_sub);
295
296 // use the first builder for getting number of valid nodes.
297 target_builders_.front().AddHistRows(p_tree, &nodes_to_build, &nodes_to_sub, true);
298 CHECK_GE(nodes_to_build.size(), nodes_to_sub.size());
299 CHECK_EQ(nodes_to_sub.size() + nodes_to_build.size(), valid_candidates.size() * 2);
300
301 // allocate storage for the rest of the builders
302 for (bst_target_t t = 1; t < target_builders_.size(); ++t) {
303 target_builders_[t].AddHistRows(p_tree, &nodes_to_build, &nodes_to_sub, false);
304 }
305
306 auto space = ConstructHistSpace(partitioners, nodes_to_build);
307 std::size_t page_idx{0};
308 for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(ctx_, param)) {
309 CHECK_EQ(gpair.Shape(1), p_tree->NumTargets());
310 for (bst_target_t t = 0; t < p_tree->NumTargets(); ++t) {
311 auto t_gpair = gpair.Slice(linalg::All(), t);
312 CHECK_EQ(t_gpair.Shape(0), p_fmat->Info().num_row_);
313 this->target_builders_[t].BuildHist(page_idx, space, page,
314 partitioners[page_idx].Partitions(), nodes_to_build,
315 t_gpair, force_read_by_column);
316 }
317 page_idx++;
318 }
319
320 for (bst_target_t t = 0; t < p_tree->NumTargets(); ++t) {
321 this->target_builders_[t].SyncHistogram(p_tree, nodes_to_build, nodes_to_sub);
322 }
323 }
324
325 [[nodiscard]] auto const &Histogram(bst_target_t t) const {
326 return target_builders_[t].Histogram();
327 }
328 [[nodiscard]] auto &Histogram(bst_target_t t) { return target_builders_[t].Histogram(); }
329
330 void Reset(Context const *ctx, bst_bin_t total_bins, bst_target_t n_targets, BatchParam const &p,
331 bool is_distributed, bool is_col_split, HistMakerTrainParam const *param) {
332 ctx_ = ctx;
333 target_builders_.resize(n_targets);
334 CHECK_GE(n_targets, 1);
335 for (auto &v : target_builders_) {
336 v.Reset(ctx, total_bins, p, is_distributed, is_col_split, param);
337 }
338 }
339};
340} // namespace xgboost::tree
341#endif // XGBOOST_TREE_HIST_HISTOGRAM_H_
Internal data structured used by XGBoost during training.
Definition data.h:509
virtual MetaInfo & Info()=0
meta information of the dataset
BatchSet< T > GetBatches()
Gets batches.
preprocessed global index matrix, in CSR format.
Definition gradient_index.h:38
uint64_t num_row_
number of rows in the data
Definition data.h:54
define regression tree to be the most common tree model.
Definition tree_model.h:158
bst_target_t NumTargets() const
The size of leaf weight.
Definition tree_model.h:481
Definition threading_utils.h:74
Stores temporary histograms to compute them in parallel Supports processing multiple tree-nodes for n...
Definition hist_util.h:455
Definition threading_utils.h:39
collection of rowset
Definition row_set.h:19
span class implementation, based on ISO++20 span<T>. The interface should be the same.
Definition span.h:424
A tensor view with static type and dimension.
Definition linalg.h:293
LINALG_HD auto Slice(S &&...slices) const
Slice the tensor.
Definition linalg.h:506
LINALG_HD auto Values() const -> decltype(data_) const &
Obtain a reference to the raw data.
Definition linalg.h:563
LINALG_HD bool Contiguous() const
Whether this is a contiguous array, both C and F contiguous returns true.
Definition linalg.h:537
A persistent cache for CPU histogram.
Definition hist_cache.h:30
void AllocateHistograms(common::Span< bst_node_t const > nodes_to_build, common::Span< bst_node_t const > nodes_to_sub)
Allocate histogram buffers for all nodes.
Definition hist_cache.h:83
void Clear(bool exceeded)
Clear the cache, mark whether the cache is exceeded the limit.
Definition hist_cache.h:65
Definition histogram.h:44
void BuildHist(std::size_t page_idx, common::BlockedSpace2d const &space, GHistIndexMatrix const &gidx, common::RowSetCollection const &row_set_collection, std::vector< bst_node_t > const &nodes_to_build, linalg::VectorView< GradientPair const > gpair, bool force_read_by_column=false)
Main entry point of this class, build histogram for tree nodes.
Definition histogram.h:148
void Reset(Context const *ctx, bst_bin_t total_bins, BatchParam const &p, bool is_distributed, bool is_col_split, HistMakerTrainParam const *param)
Reset the builder, should be called before growing a new tree.
Definition histogram.h:62
void AddHistRows(RegTree const *p_tree, std::vector< bst_node_t > *p_nodes_to_build, std::vector< bst_node_t > *p_nodes_to_sub, bool rearrange)
Allocate histogram, rearrange the nodes if rearrange is true and the tree has reached the cache size ...
Definition histogram.h:97
Histogram builder that can handle multiple targets.
Definition histogram.h:242
void BuildHistLeftRight(DMatrix *p_fmat, RegTree const *p_tree, std::vector< Partitioner > const &partitioners, std::vector< ExpandEntry > const &valid_candidates, linalg::MatrixView< GradientPair const > gpair, BatchParam const &param, bool force_read_by_column=false)
Build histogram for left and right child of valid candidates.
Definition histogram.h:287
void BuildRootHist(DMatrix *p_fmat, RegTree const *p_tree, std::vector< Partitioner > const &partitioners, linalg::MatrixView< GradientPair const > gpair, ExpandEntry const &best, BatchParam const &param, bool force_read_by_column=false)
Build the histogram for root node.
Definition histogram.h:251
Copyright 2014-2023, XGBoost Contributors.
Copyright 2015-2023 by XGBoost Contributors.
Copyright 2015-2023 by XGBoost Contributors.
defines console logging options for xgboost. Use to enforce unified print behavior.
Copyright 2021-2023 by XGBoost Contributors.
void SubtractionHist(GHistRow dst, const GHistRow src1, const GHistRow src2, size_t begin, size_t end)
Compute Subtraction: dst = src1 - src2 in range [begin, end)
Definition hist_util.cc:97
constexpr detail::AllTag All()
Specify all elements in the axis for slicing.
Definition linalg.h:265
Copyright 2021-2023 by XGBoost Contributors.
Definition tree_updater.h:25
void AssignNodes(RegTree const *p_tree, std::vector< MultiExpandEntry > const &valid_candidates, common::Span< bst_node_t > nodes_to_build, common::Span< bst_node_t > nodes_to_sub)
Decide which node as the build node for multi-target trees.
Definition histogram.cc:18
std::int32_t bst_node_t
Type for tree node index.
Definition base.h:112
std::uint32_t bst_target_t
Type for indexing into output targets.
Definition base.h:118
int32_t bst_bin_t
Type for histogram bin index.
Definition base.h:103
Parameters for constructing histogram index batches.
Definition data.h:244
Runtime context for XGBoost.
Definition context.h:84
std::int32_t Threads() const
Returns the automatically chosen number of threads based on the nthread parameter and the system sett...
Definition context.cc:203
data structure to store an instance set, a subset of rows (instances) associated with a particular no...
Definition row_set.h:30
Copyright 2014-2023 by XGBoost Contributors.
Copyright 2014-2023 by Contributors.