36 template<
typename Func>
37 void Init(
const size_t n_tasks,
size_t n_nodes, Func funcNTask) {
38 left_right_nodes_sizes_.resize(n_nodes);
39 blocks_offsets_.resize(n_nodes+1);
41 blocks_offsets_[0] = 0;
42 for (
size_t i = 1; i < n_nodes+1; ++i) {
43 blocks_offsets_[i] = blocks_offsets_[i-1] + funcNTask(i-1);
46 if (n_tasks > max_n_tasks_) {
47 mem_blocks_.resize(n_tasks);
48 max_n_tasks_ = n_tasks;
56 template <
bool default_left,
bool any_missing,
typename ColumnType,
typename Predicate>
57 inline std::pair<size_t, size_t> PartitionKernel(
ColumnType* p_column,
61 size_t base_rowid, Predicate&& pred) {
62 auto& column = *p_column;
63 size_t* p_left_part = left_part.data();
64 size_t* p_right_part = right_part.data();
65 size_t nleft_elems = 0;
66 size_t nright_elems = 0;
68 auto p_row_indices = row_indices.data();
69 auto n_samples = row_indices.size();
71 for (
size_t i = 0; i < n_samples; ++i) {
72 auto rid = p_row_indices[i];
73 const int32_t bin_id = column[rid - base_rowid];
74 if (any_missing && bin_id == ColumnType::kMissingId) {
76 p_left_part[nleft_elems++] = rid;
78 p_right_part[nright_elems++] = rid;
81 if (pred(rid, bin_id)) {
82 p_left_part[nleft_elems++] = rid;
84 p_right_part[nright_elems++] = rid;
89 return {nleft_elems, nright_elems};
92 template <
typename Pred>
97 size_t* p_left_part = left_part.data();
98 size_t* p_right_part = right_part.data();
99 size_t nleft_elems = 0;
100 size_t nright_elems = 0;
101 for (
auto row_id : ridx) {
103 p_left_part[nleft_elems++] = row_id;
105 p_right_part[nright_elems++] = row_id;
108 return {nleft_elems, nright_elems};
111 template <
typename BinIdxType,
bool any_missing,
bool any_cat,
typename ExpandEntry>
112 void Partition(
const size_t node_in_set, std::vector<ExpandEntry>
const& nodes,
115 const RegTree& tree,
const size_t* rid) {
119 std::size_t nid = nodes[node_in_set].nid;
121 bool default_left = tree.DefaultLeft(nid);
122 bool is_cat = tree.
GetSplitTypes()[nid] == FeatureType::kCategorical;
123 auto node_cats = tree.
NodeCats(nid);
124 auto const& cut_values = gmat.
cut.Values();
126 auto pred_hist = [&](
auto ridx,
auto bin_id) {
127 if (any_cat && is_cat) {
128 auto gidx = gmat.GetGindex(ridx, fid);
129 bool go_left = default_left;
131 go_left =
Decision(node_cats, cut_values[gidx]);
135 return bin_id <= split_cond;
139 auto pred_approx = [&](
auto ridx) {
140 auto gidx = gmat.GetGindex(ridx, fid);
141 bool go_left = default_left;
144 go_left =
Decision(node_cats, cut_values[gidx]);
146 go_left = cut_values[gidx] <= nodes[node_in_set].split.split_value;
152 std::pair<size_t, size_t> child_nodes_sizes;
153 if (!column_matrix.IsInitialized()) {
154 child_nodes_sizes = PartitionRangeKernel(rid_span, left, right, pred_approx);
156 if (column_matrix.GetColumnType(fid) == xgboost::common::kDenseColumn) {
157 auto column = column_matrix.DenseColumn<BinIdxType, any_missing>(fid);
159 child_nodes_sizes = PartitionKernel<true, any_missing>(&column, rid_span, left, right,
162 child_nodes_sizes = PartitionKernel<false, any_missing>(&column, rid_span, left, right,
166 CHECK_EQ(any_missing,
true);
168 column_matrix.SparseColumn<BinIdxType>(fid, rid_span.front() - gmat.
base_rowid);
170 child_nodes_sizes = PartitionKernel<true, any_missing>(&column, rid_span, left, right,
173 child_nodes_sizes = PartitionKernel<false, any_missing>(&column, rid_span, left, right,
179 const size_t n_left = child_nodes_sizes.first;
180 const size_t n_right = child_nodes_sizes.second;
182 SetNLeftElems(node_in_set, range.begin(), n_left);
183 SetNRightElems(node_in_set, range.begin(), n_right);
186 template <
bool any_missing,
typename ColumnType,
typename Predicate>
189 auto& column = *p_column;
190 for (
auto const row_id : row_indices) {
191 auto const bin_id = column[row_id - base_rowid];
192 if (any_missing && bin_id == ColumnType::kMissingId) {
193 missing_bits->Set(row_id - base_rowid);
194 }
else if (pred(row_id, bin_id)) {
195 decision_bits->Set(row_id - base_rowid);
205 template <
typename BinIdxType,
bool any_missing,
bool any_cat,
typename ExpandEntry>
206 void MaskRows(
const size_t node_in_set, std::vector<ExpandEntry>
const& nodes,
211 std::size_t nid = nodes[node_in_set].nid;
213 bool is_cat = tree.
GetSplitTypes()[nid] == FeatureType::kCategorical;
214 auto node_cats = tree.
NodeCats(nid);
215 auto const& cut_values = gmat.
cut.Values();
217 if (!column_matrix.IsInitialized()) {
218 for (
auto row_id : rid_span) {
219 auto gidx = gmat.GetGindex(row_id, fid);
223 go_left =
Decision(node_cats, cut_values[gidx]);
225 go_left = cut_values[gidx] <= nodes[node_in_set].split.split_value;
235 auto pred_hist = [&](
auto ridx,
auto bin_id) {
236 if (any_cat && is_cat) {
237 auto gidx = gmat.GetGindex(ridx, fid);
239 return Decision(node_cats, cut_values[gidx]);
241 return bin_id <= split_cond;
245 if (column_matrix.GetColumnType(fid) == xgboost::common::kDenseColumn) {
246 auto column = column_matrix.DenseColumn<BinIdxType, any_missing>(fid);
247 MaskKernel<any_missing>(&column, rid_span, gmat.
base_rowid, decision_bits, missing_bits,
250 CHECK_EQ(any_missing,
true);
252 column_matrix.SparseColumn<BinIdxType>(fid, rid_span.front() - gmat.
base_rowid);
253 MaskKernel<any_missing>(&column, rid_span, gmat.
base_rowid, decision_bits, missing_bits,
263 template <
typename ExpandEntry>
271 std::size_t nid = nodes[node_in_set].nid;
272 bool default_left = tree.DefaultLeft(nid);
274 auto pred = [&](
auto ridx) {
275 bool go_left = default_left;
276 bool is_missing = missing_bits.Check(ridx - gmat.
base_rowid);
278 go_left = decision_bits.Check(ridx - gmat.
base_rowid);
283 std::pair<size_t, size_t> child_nodes_sizes;
284 child_nodes_sizes = PartitionRangeKernel(rid_span, left, right, pred);
286 const size_t n_left = child_nodes_sizes.first;
287 const size_t n_right = child_nodes_sizes.second;
289 SetNLeftElems(node_in_set, range.begin(), n_left);
290 SetNRightElems(node_in_set, range.begin(), n_right);
294 void AllocateForTask(
size_t id) {
295 if (mem_blocks_[
id].
get() ==
nullptr) {
296 BlockInfo* local_block_ptr =
new BlockInfo;
297 CHECK_NE(local_block_ptr, (BlockInfo*)
nullptr);
298 mem_blocks_[id].reset(local_block_ptr);
302 common::Span<size_t> GetLeftBuffer(
int nid,
size_t begin,
size_t end) {
303 const size_t task_idx = GetTaskIdx(nid, begin);
304 return { mem_blocks_.at(task_idx)->Left(), end - begin };
307 common::Span<size_t> GetRightBuffer(
int nid,
size_t begin,
size_t end) {
308 const size_t task_idx = GetTaskIdx(nid, begin);
309 return { mem_blocks_.at(task_idx)->Right(), end - begin };
312 void SetNLeftElems(
int nid,
size_t begin,
size_t n_left) {
313 size_t task_idx = GetTaskIdx(nid, begin);
314 mem_blocks_.at(task_idx)->n_left = n_left;
317 void SetNRightElems(
int nid,
size_t begin,
size_t n_right) {
318 size_t task_idx = GetTaskIdx(nid, begin);
319 mem_blocks_.at(task_idx)->n_right = n_right;
323 [[nodiscard]] std::size_t GetNLeftElems(
int nid)
const {
324 return left_right_nodes_sizes_[nid].first;
327 [[nodiscard]] std::size_t GetNRightElems(
int nid)
const {
328 return left_right_nodes_sizes_[nid].second;
333 void CalculateRowOffsets() {
334 for (
size_t i = 0; i < blocks_offsets_.size()-1; ++i) {
336 for (
size_t j = blocks_offsets_[i]; j < blocks_offsets_[i+1]; ++j) {
337 mem_blocks_[j]->n_offset_left = n_left;
338 n_left += mem_blocks_[j]->n_left;
341 for (
size_t j = blocks_offsets_[i]; j < blocks_offsets_[i + 1]; ++j) {
342 mem_blocks_[j]->n_offset_right = n_left + n_right;
343 n_right += mem_blocks_[j]->n_right;
345 left_right_nodes_sizes_[i] = {n_left, n_right};
349 void MergeToArray(
int nid,
size_t begin,
size_t* rows_indexes) {
350 size_t task_idx = GetTaskIdx(nid, begin);
352 size_t* left_result = rows_indexes + mem_blocks_[task_idx]->n_offset_left;
353 size_t* right_result = rows_indexes + mem_blocks_[task_idx]->n_offset_right;
355 const size_t* left = mem_blocks_[task_idx]->Left();
356 const size_t* right = mem_blocks_[task_idx]->Right();
358 std::copy_n(left, mem_blocks_[task_idx]->n_left, left_result);
359 std::copy_n(right, mem_blocks_[task_idx]->n_right, right_result);
362 size_t GetTaskIdx(
int nid,
size_t begin) {
363 return blocks_offsets_[nid] + begin / BlockSize;
367 template <
typename Sampledp>
368 void LeafPartition(Context
const* ctx, RegTree
const& tree, RowSetCollection
const& row_set,
369 std::vector<bst_node_t>* p_position, Sampledp sampledp)
const {
370 auto& h_pos = *p_position;
371 h_pos.resize(row_set.Data()->size(), std::numeric_limits<bst_node_t>::max());
373 auto p_begin = row_set.Data()->data();
374 ParallelFor(row_set.Size(), ctx->
Threads(), [&](
size_t i) {
375 auto const& node = row_set[i];
376 if (node.node_id < 0) {
379 CHECK(tree.IsLeaf(node.node_id));
381 size_t ptr_offset = node.end - p_begin;
382 CHECK_LE(ptr_offset, row_set.Data()->size()) << node.node_id;
383 for (
auto idx = node.begin; idx != node.end; ++idx) {
384 h_pos[*idx] = sampledp(*idx) ? ~node.node_id : node.node_id;
395 size_t n_offset_left;
396 size_t n_offset_right;
399 return &left_data_[0];
403 return &right_data_[0];
406 size_t left_data_[BlockSize];
407 size_t right_data_[BlockSize];
409 std::vector<std::pair<size_t, size_t>> left_right_nodes_sizes_;
410 std::vector<size_t> blocks_offsets_;
411 std::vector<std::shared_ptr<BlockInfo>> mem_blocks_;
412 size_t max_n_tasks_ = 0;
std::int32_t Threads() const
Returns the automatically chosen number of threads based on the nthread parameter and the system sett...
Definition context.cc:203