34 : partition_builder_{partition_builder}, row_set_collection_{row_set_collection} {
35 decision_storage_.resize(num_row);
37 missing_storage_.resize(num_row);
41 template <
typename BinIdxType,
bool any_missing,
bool any_cat,
typename ExpandEntry>
44 std::vector<ExpandEntry>
const& nodes,
45 std::vector<int32_t>
const& split_conditions,
RegTree const* p_tree) {
48 std::fill(decision_storage_.begin(), decision_storage_.end(), 0);
49 std::fill(missing_storage_.begin(), missing_storage_.end(), 0);
50 common::ParallelFor2d(space, n_threads, [&](
size_t node_in_set,
common::Range1d r) {
51 const int32_t nid = nodes[node_in_set].nid;
52 bst_bin_t split_cond = column_matrix.IsInitialized() ? split_conditions[node_in_set] : 0;
53 partition_builder_->
MaskRows<BinIdxType, any_missing, any_cat>(
54 node_in_set, nodes, r, split_cond, gmat, column_matrix, *p_tree,
55 (*row_set_collection_)[nid].begin, &decision_bits_, &missing_bits_);
59 collective::Allreduce<collective::Operation::kBitwiseOR>(decision_storage_.data(),
60 decision_storage_.size());
61 collective::Allreduce<collective::Operation::kBitwiseAND>(missing_storage_.data(),
62 missing_storage_.size());
65 common::ParallelFor2d(space, n_threads, [&](
size_t node_in_set,
common::Range1d r) {
66 size_t begin = r.begin();
67 const int32_t nid = nodes[node_in_set].nid;
68 const size_t task_id = partition_builder_->GetTaskIdx(node_in_set, begin);
69 partition_builder_->AllocateForTask(task_id);
70 partition_builder_->
PartitionByMask(node_in_set, nodes, r, gmat, *p_tree,
71 (*row_set_collection_)[nid].begin, decision_bits_,
78 std::vector<BitVector::value_type> decision_storage_{};
80 std::vector<BitVector::value_type> missing_storage_{};
93 : base_rowid{_base_rowid}, is_col_split_{is_col_split} {
94 row_set_collection_.Clear();
95 std::vector<size_t>& row_indices = *row_set_collection_.Data();
96 row_indices.resize(num_row);
98 std::size_t* p_row_indices = row_indices.data();
99 common::Iota(ctx, p_row_indices, p_row_indices + row_indices.size(), base_rowid);
100 row_set_collection_.Init();
103 column_split_helper_ =
ColumnSplitHelper{num_row, &partition_builder_, &row_set_collection_};
107 template <
typename ExpandEntry>
108 void FindSplitConditions(
const std::vector<ExpandEntry>& nodes,
const RegTree& tree,
110 auto const& ptrs = gmat.
cut.Ptrs();
111 auto const& vals = gmat.
cut.Values();
113 for (std::size_t i = 0; i < nodes.size(); ++i) {
116 float const split_pt = tree.SplitCond(nidx);
117 std::uint32_t
const lower_bound = ptrs[fidx];
118 std::uint32_t
const upper_bound = ptrs[fidx + 1];
122 CHECK_LT(upper_bound,
static_cast<uint32_t
>(std::numeric_limits<int32_t>::max()));
123 for (
auto bound = lower_bound; bound < upper_bound; ++bound) {
124 if (split_pt == vals[bound]) {
125 split_cond =
static_cast<bst_bin_t>(bound);
128 (*split_conditions)[i] = split_cond;
132 template <
typename ExpandEntry>
133 void AddSplitsToRowSet(
const std::vector<ExpandEntry>& nodes,
RegTree const* p_tree) {
134 const size_t n_nodes = nodes.size();
135 for (
unsigned int i = 0; i < n_nodes; ++i) {
136 const int32_t nidx = nodes[i].nid;
137 const size_t n_left = partition_builder_.GetNLeftElems(i);
138 const size_t n_right = partition_builder_.GetNRightElems(i);
139 CHECK_EQ(p_tree->LeftChild(nidx) + 1, p_tree->RightChild(nidx));
140 row_set_collection_.AddSplit(nidx, p_tree->LeftChild(nidx), p_tree->RightChild(nidx), n_left,
145 template <
typename ExpandEntry>
147 std::vector<ExpandEntry>
const& nodes,
RegTree const* p_tree) {
148 auto const& column_matrix = gmat.Transpose();
149 if (column_matrix.IsInitialized()) {
150 if (gmat.
cut.HasCategorical()) {
151 this->
template UpdatePosition<true>(ctx, gmat, column_matrix, nodes, p_tree);
153 this->
template UpdatePosition<false>(ctx, gmat, column_matrix, nodes, p_tree);
161 this->
template UpdatePosition<uint8_t, true, true>(ctx, gmat, column_matrix, nodes, p_tree);
165 template <
bool any_cat,
typename ExpandEntry>
168 std::vector<ExpandEntry>
const& nodes,
RegTree const* p_tree) {
169 if (column_matrix.AnyMissing()) {
170 this->
template UpdatePosition<true, any_cat>(ctx, gmat, column_matrix, nodes, p_tree);
172 this->
template UpdatePosition<false, any_cat>(ctx, gmat, column_matrix, nodes, p_tree);
176 template <
bool any_missing,
bool any_cat,
typename ExpandEntry>
179 std::vector<ExpandEntry>
const& nodes,
RegTree const* p_tree) {
181 using T = decltype(t);
182 this->template UpdatePosition<T, any_missing, any_cat>(ctx, gmat, column_matrix, nodes,
187 template <
typename BinIdxType,
bool any_missing,
bool any_cat,
typename ExpandEntry>
190 std::vector<ExpandEntry>
const& nodes,
RegTree const* p_tree) {
192 size_t n_nodes = nodes.size();
194 std::vector<int32_t> split_conditions;
195 if (column_matrix.IsInitialized()) {
196 split_conditions.resize(n_nodes);
197 FindSplitConditions(nodes, *p_tree, gmat, &split_conditions);
203 [&](
size_t node_in_set) {
204 int32_t nid = nodes[node_in_set].nid;
205 return row_set_collection_[nid].Size();
207 kPartitionBlockSize);
211 partition_builder_.Init(space.Size(), n_nodes, [&](
size_t node_in_set) {
212 const int32_t nid = nodes[node_in_set].nid;
213 const size_t size = row_set_collection_[nid].Size();
214 const size_t n_tasks = size / kPartitionBlockSize + !!(size % kPartitionBlockSize);
222 column_split_helper_.Partition<BinIdxType, any_missing, any_cat>(
223 space, ctx->
Threads(), gmat, column_matrix, nodes, split_conditions, p_tree);
226 size_t begin = r.begin();
227 const int32_t nid = nodes[node_in_set].nid;
228 const size_t task_id = partition_builder_.GetTaskIdx(node_in_set, begin);
229 partition_builder_.AllocateForTask(task_id);
230 bst_bin_t split_cond = column_matrix.IsInitialized() ? split_conditions[node_in_set] : 0;
231 partition_builder_.template Partition<BinIdxType, any_missing, any_cat>(
232 node_in_set, nodes, r, split_cond, gmat, column_matrix, *p_tree,
233 row_set_collection_[nid].begin);
239 partition_builder_.CalculateRowOffsets();
244 const int32_t nid = nodes[node_in_set].nid;
245 partition_builder_.MergeToArray(node_in_set, r.begin(),
246 const_cast<size_t*>(row_set_collection_[nid].begin));
250 AddSplitsToRowSet(nodes, p_tree);
253 [[nodiscard]]
auto const& Partitions()
const {
return row_set_collection_; }
255 [[nodiscard]] std::size_t Size()
const {
256 return std::distance(row_set_collection_.begin(), row_set_collection_.end());
259 auto& operator[](
bst_node_t nidx) {
return row_set_collection_[nidx]; }
260 auto const& operator[](
bst_node_t nidx)
const {
return row_set_collection_[nidx]; }
263 std::vector<bst_node_t>* p_out_position)
const {
264 partition_builder_.LeafPartition(ctx, tree, this->Partitions(), p_out_position,
265 [&](
size_t idx) ->
bool {
return hess[idx] - .0f == .0f; });
270 std::vector<bst_node_t>* p_out_position)
const {
271 if (gpair.Shape(1) > 1) {
272 partition_builder_.LeafPartition(
273 ctx, tree, this->Partitions(), p_out_position, [&](std::size_t idx) ->
bool {
275 return std::all_of(linalg::cbegin(sample), linalg::cend(sample),
276 [](
GradientPair const& g) {
return g.GetHess() - .0f == .0f; });
280 partition_builder_.LeafPartition(
281 ctx, tree, this->Partitions(), p_out_position,
282 [&](std::size_t idx) ->
bool {
return s(idx).GetHess() - .0f == .0f; });
287 std::vector<bst_node_t>* p_out_position)
const {
288 partition_builder_.LeafPartition(
289 ctx, tree, this->Partitions(), p_out_position,
290 [&](std::size_t idx) ->
bool {
return gpair[idx].GetHess() - .0f == .0f; });
std::int32_t Threads() const
Returns the automatically chosen number of threads based on the nthread parameter and the system sett...
Definition context.cc:203