Medial Code Documentation
Loading...
Searching...
No Matches
param.h
Go to the documentation of this file.
1
7#ifndef XGBOOST_TREE_PARAM_H_
8#define XGBOOST_TREE_PARAM_H_
9
10#include <algorithm>
11#include <cmath>
12#include <cstring>
13#include <limits>
14#include <string>
15#include <vector>
16
17#include "../common/categorical.h"
18#include "../common/linalg_op.h"
19#include "../common/math.h"
20#include "xgboost/data.h"
21#include "xgboost/linalg.h"
22#include "xgboost/parameter.h"
23
24namespace xgboost {
25namespace tree {
26
28struct TrainParam : public XGBoostParameter<TrainParam> {
29 // learning step size for a time
30 float learning_rate;
31 // minimum loss change required for a split
32 float min_split_loss;
33 // maximum depth of a tree
34 int max_depth;
35 // maximum number of leaves
36 int max_leaves;
37 // if using histogram based algorithm, maximum number of bins per feature
38 int max_bin;
39 // growing policy
40 enum TreeGrowPolicy { kDepthWise = 0, kLossGuide = 1 };
41 int grow_policy;
42
43 uint32_t max_cat_to_onehot{4};
44
45 bst_bin_t max_cat_threshold{64};
46
47 //----- the rest parameters are less important ----
48 // minimum amount of hessian(weight) allowed in a child
49 float min_child_weight;
50 // L2 regularization factor
51 float reg_lambda;
52 // L1 regularization factor
53 float reg_alpha;
54 // maximum delta update we can add in weight estimation
55 // this parameter can be used to stabilize update
56 // default=0 means no constraint on weight delta
57 float max_delta_step;
58 // whether we want to do subsample
59 float subsample;
60 // sampling method
61 enum SamplingMethod { kUniform = 0, kGradientBased = 1 };
62 int sampling_method;
63 // whether to subsample columns in each split (node)
64 float colsample_bynode;
65 // whether to subsample columns in each level
66 float colsample_bylevel;
67 // whether to subsample columns during tree construction
68 float colsample_bytree;
69 // accuracy of sketch
70 float sketch_ratio;
71 // option to open cacheline optimization
72 bool cache_opt;
73 // whether refresh updater needs to update the leaf values
74 bool refresh_leaf;
75
76 std::vector<int> monotone_constraints;
77 // Stored as a JSON string.
78 std::string interaction_constraints;
79
80 // ------ From CPU quantile histogram -------.
81 // percentage threshold for treating a feature as sparse
82 // e.g. 0.2 indicates a feature with fewer than 20% nonzeros is considered sparse
83 static constexpr double DftSparseThreshold() { return 0.2; }
84
85 double sparse_threshold{DftSparseThreshold()};
86
87 // declare the parameters
88 DMLC_DECLARE_PARAMETER(TrainParam) {
89 DMLC_DECLARE_FIELD(learning_rate)
90 .set_lower_bound(0.0f)
91 .set_default(0.3f)
92 .describe("Learning rate(step size) of update.");
93 DMLC_DECLARE_FIELD(min_split_loss)
94 .set_lower_bound(0.0f)
95 .set_default(0.0f)
96 .describe(
97 "Minimum loss reduction required to make a further partition.");
98 DMLC_DECLARE_FIELD(max_depth)
99 .set_lower_bound(0)
100 .set_default(6)
101 .describe(
102 "Maximum depth of the tree; 0 indicates no limit; a limit is required "
103 "for depthwise policy");
104 DMLC_DECLARE_FIELD(max_leaves).set_lower_bound(0).set_default(0).describe(
105 "Maximum number of leaves; 0 indicates no limit.");
106 DMLC_DECLARE_FIELD(max_bin).set_lower_bound(2).set_default(256).describe(
107 "if using histogram-based algorithm, maximum number of bins per feature");
108 DMLC_DECLARE_FIELD(grow_policy)
109 .set_default(kDepthWise)
110 .add_enum("depthwise", kDepthWise)
111 .add_enum("lossguide", kLossGuide)
112 .describe(
113 "Tree growing policy. 0: favor splitting at nodes closest to the node, "
114 "i.e. grow depth-wise. 1: favor splitting at nodes with highest loss "
115 "change. (cf. LightGBM)");
116 DMLC_DECLARE_FIELD(max_cat_to_onehot)
117 .set_default(4)
118 .set_lower_bound(1)
119 .describe("Maximum number of categories to use one-hot encoding based split.");
120 DMLC_DECLARE_FIELD(max_cat_threshold)
121 .set_default(64)
122 .set_lower_bound(1)
123 .describe(
124 "Maximum number of categories considered for split. Used only by partition-based"
125 "splits.");
126 DMLC_DECLARE_FIELD(min_child_weight)
127 .set_lower_bound(0.0f)
128 .set_default(1.0f)
129 .describe("Minimum sum of instance weight(hessian) needed in a child.");
130 DMLC_DECLARE_FIELD(reg_lambda)
131 .set_lower_bound(0.0f)
132 .set_default(1.0f)
133 .describe("L2 regularization on leaf weight");
134 DMLC_DECLARE_FIELD(reg_alpha)
135 .set_lower_bound(0.0f)
136 .set_default(0.0f)
137 .describe("L1 regularization on leaf weight");
138 DMLC_DECLARE_FIELD(max_delta_step)
139 .set_lower_bound(0.0f)
140 .set_default(0.0f)
141 .describe("Maximum delta step we allow each tree's weight estimate to be. "\
142 "If the value is set to 0, it means there is no constraint");
143 DMLC_DECLARE_FIELD(subsample)
144 .set_range(0.0f, 1.0f)
145 .set_default(1.0f)
146 .describe("Row subsample ratio of training instance.");
147 DMLC_DECLARE_FIELD(sampling_method)
148 .set_default(kUniform)
149 .add_enum("uniform", kUniform)
150 .add_enum("gradient_based", kGradientBased)
151 .describe(
152 "Sampling method. 0: select random training instances uniformly. "
153 "1: select random training instances with higher probability when the "
154 "gradient and hessian are larger. (cf. CatBoost)");
155 DMLC_DECLARE_FIELD(colsample_bynode)
156 .set_range(0.0f, 1.0f)
157 .set_default(1.0f)
158 .describe("Subsample ratio of columns, resample on each node (split).");
159 DMLC_DECLARE_FIELD(colsample_bylevel)
160 .set_range(0.0f, 1.0f)
161 .set_default(1.0f)
162 .describe("Subsample ratio of columns, resample on each level.");
163 DMLC_DECLARE_FIELD(colsample_bytree)
164 .set_range(0.0f, 1.0f)
165 .set_default(1.0f)
166 .describe("Subsample ratio of columns, resample on each tree construction.");
167 DMLC_DECLARE_FIELD(sketch_ratio)
168 .set_lower_bound(0.0f)
169 .set_default(2.0f)
170 .describe("EXP Param: Sketch accuracy related parameter of approximate algorithm.");
171 DMLC_DECLARE_FIELD(cache_opt)
172 .set_default(true)
173 .describe("EXP Param: Cache aware optimization.");
174 DMLC_DECLARE_FIELD(refresh_leaf)
175 .set_default(true)
176 .describe("Whether the refresh updater needs to update leaf values.");
177 DMLC_DECLARE_FIELD(monotone_constraints)
178 .set_default(std::vector<int>())
179 .describe("Constraint of variable monotonicity");
180 DMLC_DECLARE_FIELD(interaction_constraints)
181 .set_default("")
182 .describe("Constraints for interaction representing permitted interactions."
183 "The constraints must be specified in the form of a nest list,"
184 "e.g. [[0, 1], [2, 3, 4]], where each inner list is a group of"
185 "indices of features that are allowed to interact with each other."
186 "See tutorial for more information");
187
188 // ------ From cpu quantile histogram -------.
189 DMLC_DECLARE_FIELD(sparse_threshold)
190 .set_range(0, 1.0)
191 .set_default(DftSparseThreshold())
192 .describe("percentage threshold for treating a feature as sparse");
193
194 // add alias of parameters
195 DMLC_DECLARE_ALIAS(reg_lambda, lambda);
196 DMLC_DECLARE_ALIAS(reg_alpha, alpha);
197 DMLC_DECLARE_ALIAS(min_split_loss, gamma);
198 DMLC_DECLARE_ALIAS(learning_rate, eta);
199 }
200
202 [[nodiscard]] bool NeedPrune(double loss_chg, int depth) const {
203 return loss_chg < this->min_split_loss || (this->max_depth != 0 && depth > this->max_depth);
204 }
205
206 [[nodiscard]] bst_node_t MaxNodes() const {
207 if (this->max_depth == 0 && this->max_leaves == 0) {
208 LOG(FATAL) << "Max leaves and max depth cannot both be unconstrained.";
209 }
210 bst_node_t n_nodes{0};
211 if (this->max_leaves > 0) {
212 n_nodes = this->max_leaves * 2 - 1;
213 } else {
214 // bst_node_t will overflow.
215 CHECK_LE(this->max_depth, 30)
216 << "max_depth can not be greater than 30 as that might generate 2^31 - 1"
217 "nodes.";
218 // same as: (1 << (max_depth + 1)) - 1, but avoids 1 << 31, which overflows.
219 n_nodes = (1 << this->max_depth) + ((1 << this->max_depth) - 1);
220 }
221 CHECK_GT(n_nodes, 0);
222 return n_nodes;
223 }
224};
225
228// functions for L1 cost
229template <typename T1, typename T2>
230XGBOOST_DEVICE inline static T1 ThresholdL1(T1 w, T2 alpha) {
231 if (w > + alpha) {
232 return w - alpha;
233 }
234 if (w < - alpha) {
235 return w + alpha;
236 }
237 return 0.0;
238}
239
240// calculate the cost of loss function
241template <typename TrainingParams, typename T>
242XGBOOST_DEVICE inline T CalcGainGivenWeight(const TrainingParams &p, T sum_grad, T sum_hess, T w) {
243 return -(static_cast<T>(2.0) * sum_grad * w + (sum_hess + p.reg_lambda) * common::Sqr(w));
244}
245
246// calculate weight given the statistics
247template <typename TrainingParams, typename T>
248XGBOOST_DEVICE inline T CalcWeight(const TrainingParams &p, T sum_grad,
249 T sum_hess) {
250 if (sum_hess < p.min_child_weight || sum_hess <= 0.0) {
251 return 0.0;
252 }
253 T dw = -ThresholdL1(sum_grad, p.reg_alpha) / (sum_hess + p.reg_lambda);
254 if (p.max_delta_step != 0.0f && std::abs(dw) > p.max_delta_step) {
255 dw = std::copysign(p.max_delta_step, dw);
256 }
257 return dw;
258}
259
260// calculate the cost of loss function
261template <typename TrainingParams, typename T>
262XGBOOST_DEVICE inline T CalcGain(const TrainingParams &p, T sum_grad, T sum_hess) {
263 if (sum_hess < p.min_child_weight || sum_hess <= 0.0) {
264 return static_cast<T>(0.0);
265 }
266 if (p.max_delta_step == 0.0f) {
267 if (p.reg_alpha == 0.0f) {
268 return common::Sqr(sum_grad) / (sum_hess + p.reg_lambda);
269 } else {
270 return common::Sqr(ThresholdL1(sum_grad, p.reg_alpha)) /
271 (sum_hess + p.reg_lambda);
272 }
273 } else {
274 T w = CalcWeight(p, sum_grad, sum_hess);
275 T ret = CalcGainGivenWeight(p, sum_grad, sum_hess, w);
276 if (p.reg_alpha == 0.0f) {
277 return ret;
278 } else {
279 return ret + p.reg_alpha * std::abs(w);
280 }
281 }
282}
283
284template <typename TrainingParams,
285 typename StatT, typename T = decltype(StatT().GetHess())>
286XGBOOST_DEVICE inline T CalcGain(const TrainingParams &p, StatT stat) {
287 return CalcGain(p, stat.GetGrad(), stat.GetHess());
288}
289
290// Used in GPU code where GradientPair is used for gradient sum, not GradStats.
291template <typename TrainingParams, typename GpairT>
292XGBOOST_DEVICE inline float CalcWeight(const TrainingParams &p, GpairT sum_grad) {
293 return CalcWeight(p, sum_grad.GetGrad(), sum_grad.GetHess());
294}
295
299inline void CalcWeight(TrainParam const &p, linalg::VectorView<GradientPairPrecise const> grad_sum,
300 float eta, linalg::VectorView<float> out_w) {
301 for (bst_target_t i = 0; i < out_w.Size(); ++i) {
302 out_w(i) = CalcWeight(p, grad_sum(i).GetGrad(), grad_sum(i).GetHess()) * eta;
303 }
304}
305
309inline void CalcWeight(TrainParam const &p, linalg::VectorView<GradientPairPrecise const> grad_sum,
311 return CalcWeight(p, grad_sum, 1.0f, out_w);
312}
313
314inline double CalcGainGivenWeight(TrainParam const &p,
317 double gain{0};
318 for (bst_target_t i = 0; i < weight.Size(); ++i) {
319 gain += -weight(i) * ThresholdL1(sum_grad(i).GetGrad(), p.reg_alpha);
320 }
321 return gain;
322}
323
325struct XGBOOST_ALIGNAS(16) GradStats {
326 using GradType = double;
328 GradType sum_grad { 0 };
330 GradType sum_hess { 0 };
331
332 public:
333 [[nodiscard]] XGBOOST_DEVICE GradType GetGrad() const { return sum_grad; }
334 [[nodiscard]] XGBOOST_DEVICE GradType GetHess() const { return sum_hess; }
335
336 friend std::ostream& operator<<(std::ostream& os, GradStats s) {
337 os << s.GetGrad() << "/" << s.GetHess();
338 return os;
339 }
340
341 XGBOOST_DEVICE GradStats() {
342 static_assert(sizeof(GradStats) == 16,
343 "Size of GradStats is not 16 bytes.");
344 }
345
346 template <typename GpairT>
347 XGBOOST_DEVICE explicit GradStats(const GpairT &sum)
348 : sum_grad(sum.GetGrad()), sum_hess(sum.GetHess()) {}
349 explicit GradStats(const GradType grad, const GradType hess)
350 : sum_grad(grad), sum_hess(hess) {}
355 inline void Add(GradientPair p) { this->Add(p.GetGrad(), p.GetHess()); }
356
358 inline void Add(const GradStats& b) {
359 sum_grad += b.sum_grad;
360 sum_hess += b.sum_hess;
361 }
363 inline static void Reduce(GradStats& a, const GradStats& b) { // NOLINT(*)
364 a.Add(b);
365 }
367 inline void SetSubstract(const GradStats& a, const GradStats& b) {
368 sum_grad = a.sum_grad - b.sum_grad;
369 sum_hess = a.sum_hess - b.sum_hess;
370 }
372 [[nodiscard]] bool Empty() const { return sum_hess == 0.0; }
374 inline void Add(GradType grad, GradType hess) {
375 sum_grad += grad;
376 sum_hess += hess;
377 }
378};
379
380// Helper functions for copying gradient statistic, one for vector leaf, another for normal scalar.
381template <typename T, typename U>
382std::vector<T> &CopyStats(linalg::VectorView<U> const &src, std::vector<T> *dst) { // NOLINT
383 dst->resize(src.Size());
384 std::copy(linalg::cbegin(src), linalg::cend(src), dst->begin());
385 return *dst;
386}
387
388inline GradStats &CopyStats(GradStats const &src, GradStats *dst) { // NOLINT
389 *dst = src;
390 return *dst;
391}
392
397template<typename GradientT>
400 bst_float loss_chg {0.0f};
402 bst_feature_t sindex{0};
403 bst_float split_value{0.0f};
404 std::vector<uint32_t> cat_bits;
405 bool is_cat{false};
406
407 GradientT left_sum;
408 GradientT right_sum;
409
410 SplitEntryContainer() = default;
411
412 friend std::ostream &operator<<(std::ostream &os, SplitEntryContainer const &s) {
413 os << "loss_chg: " << s.loss_chg << "\n"
414 << "dft_left: " << s.DefaultLeft() << "\n"
415 << "split_index: " << s.SplitIndex() << "\n"
416 << "split_value: " << s.split_value << "\n"
417 << "is_cat: " << s.is_cat << "\n"
418 << "left_sum: " << s.left_sum << "\n"
419 << "right_sum: " << s.right_sum << std::endl;
420 return os;
421 }
422
433 std::vector<uint32_t> *collected_cat_bits,
434 std::vector<std::size_t> *cat_bits_sizes) {
435 loss_chg = that.loss_chg;
436 sindex = that.sindex;
437 split_value = that.split_value;
438 is_cat = that.is_cat;
439 static_assert(std::is_trivially_copyable_v<GradientT>);
440 left_sum = that.left_sum;
441 right_sum = that.right_sum;
442 collected_cat_bits->insert(collected_cat_bits->end(), that.cat_bits.cbegin(),
443 that.cat_bits.cend());
444 cat_bits_sizes->emplace_back(that.cat_bits.size());
445 }
446
457 template <typename G>
459 std::vector<uint32_t> *collected_cat_bits,
460 std::vector<std::size_t> *cat_bits_sizes,
461 std::vector<G> *collected_gradients) {
462 loss_chg = that.loss_chg;
463 sindex = that.sindex;
464 split_value = that.split_value;
465 is_cat = that.is_cat;
466 collected_cat_bits->insert(collected_cat_bits->end(), that.cat_bits.cbegin(),
467 that.cat_bits.cend());
468 cat_bits_sizes->emplace_back(that.cat_bits.size());
469 static_assert(!std::is_trivially_copyable_v<GradientT>);
470 collected_gradients->insert(collected_gradients->end(), that.left_sum.cbegin(),
471 that.left_sum.cend());
472 collected_gradients->insert(collected_gradients->end(), that.right_sum.cbegin(),
473 that.right_sum.cend());
474 }
475
477 [[nodiscard]] bst_feature_t SplitIndex() const { return sindex & ((1U << 31) - 1U); }
479 [[nodiscard]] bool DefaultLeft() const { return (sindex >> 31) != 0; }
490 [[nodiscard]] bool NeedReplace(bst_float new_loss_chg, unsigned split_index) const {
491 if (std::isinf(new_loss_chg)) { // in some cases new_loss_chg can be NaN or Inf,
492 // for example when lambda = 0 & min_child_weight = 0
493 // skip value in this case
494 return false;
495 } else if (this->SplitIndex() <= split_index) {
496 return new_loss_chg > this->loss_chg;
497 } else {
498 return !(this->loss_chg > new_loss_chg);
499 }
500 }
506 inline bool Update(const SplitEntryContainer &e) {
507 if (this->NeedReplace(e.loss_chg, e.SplitIndex())) {
508 this->loss_chg = e.loss_chg;
509 this->sindex = e.sindex;
510 this->split_value = e.split_value;
511 this->is_cat = e.is_cat;
512 this->cat_bits = e.cat_bits;
513 this->left_sum = e.left_sum;
514 this->right_sum = e.right_sum;
515 return true;
516 } else {
517 return false;
518 }
519 }
528 template <typename GradientSumT>
529 bool Update(bst_float new_loss_chg, bst_feature_t split_index, float new_split_value,
530 bool default_left, bool is_cat, GradientSumT const &left_sum,
531 GradientSumT const &right_sum) {
532 if (this->NeedReplace(new_loss_chg, split_index)) {
533 this->loss_chg = new_loss_chg;
534 if (default_left) {
535 split_index |= (1U << 31);
536 }
537 this->sindex = split_index;
538 this->split_value = new_split_value;
539 this->is_cat = is_cat;
540 CopyStats(left_sum, &this->left_sum);
541 CopyStats(right_sum, &this->right_sum);
542 return true;
543 } else {
544 return false;
545 }
546 }
547
549 inline static void Reduce(SplitEntryContainer &dst, // NOLINT(*)
550 const SplitEntryContainer &src) { // NOLINT(*)
551 dst.Update(src);
552 }
553};
554
555using SplitEntry = SplitEntryContainer<GradStats>;
556} // namespace tree
557
558/*
559 * \brief Parse the interaction constraints from string.
560 * \param constraint_str String storing the interaction constraints:
561 *
562 * Example input string:
563 *
564 * "[[1, 2], [3, 4]]""
565 *
566 * \param p_out Pointer to output
567 */
568void ParseInteractionConstraint(
569 std::string const &constraint_str,
570 std::vector<std::vector<xgboost::bst_feature_t>> *p_out);
571} // namespace xgboost
572
573// define string serializer for vector, to get the arguments
574namespace std {
575inline std::ostream &operator<<(std::ostream &os, const std::vector<int> &t) {
576 os << '(';
577 for (auto it = t.begin(); it != t.end(); ++it) {
578 if (it != t.begin()) {
579 os << ',';
580 }
581 os << *it;
582 }
583 // python style tuple
584 if (t.size() == 1) {
585 os << ',';
586 }
587 os << ')';
588 return os;
589}
590
591std::istream &operator>>(std::istream &is, std::vector<int> &t);
592} // namespace std
593
594#endif // XGBOOST_TREE_PARAM_H_
A tensor view with static type and dimension.
Definition linalg.h:293
LINALG_HD std::size_t Size() const
Number of items in the tensor.
Definition linalg.h:533
#define XGBOOST_ALIGNAS(X)
Check if alignas(*) keyword is supported. (g++ 4.8 or higher)
Definition base.h:49
#define XGBOOST_DEVICE
Tag function as usable by device.
Definition base.h:64
Copyright 2015-2023 by XGBoost Contributors.
macro for using C++11 enum class as DMLC parameter
Copyright 2021-2023 by XGBoost Contributors.
std::istream & operator>>(std::istream &is, optional< T > &t)
parse a string object into optional<T>
Definition optional.h:323
Definition StdDeque.h:58
double Reduce(Context const *ctx, HostDeviceVector< float > const &values)
Reduction on host device vector.
Definition numeric.cc:13
auto Empty(Context const *ctx, Index &&...index)
Create an array without initialization.
Definition linalg.h:947
namespace of xgboost
Definition base.h:90
uint32_t bst_feature_t
Type for data column (feature) index.
Definition base.h:101
std::int32_t bst_node_t
Type for tree node index.
Definition base.h:112
std::uint32_t bst_target_t
Type for indexing into output targets.
Definition base.h:118
detail::GradientPairInternal< float > GradientPair
gradient statistics pair usually needed in gradient boosting
Definition base.h:256
int32_t bst_bin_t
Type for histogram bin index.
Definition base.h:103
float bst_float
float type, used for storing statistics
Definition base.h:97
Definition parameter.h:84
statistics that is helpful to store and represent a split solution for the tree
Definition param.h:398
bool NeedReplace(bst_float new_loss_chg, unsigned split_index) const
decides whether we can replace current entry with the given statistics
Definition param.h:490
void CopyAndCollect(SplitEntryContainer< GradientT > const &that, std::vector< uint32_t > *collected_cat_bits, std::vector< std::size_t > *cat_bits_sizes, std::vector< G > *collected_gradients)
Copy primitive fields into this, and collect cat_bits and gradient sums into vectors.
Definition param.h:458
static void Reduce(SplitEntryContainer &dst, const SplitEntryContainer &src)
same as update, used by AllReduce
Definition param.h:549
bst_feature_t SplitIndex() const
Definition param.h:477
bool Update(const SplitEntryContainer &e)
update the split entry, replace it if e is better
Definition param.h:506
bool DefaultLeft() const
Definition param.h:479
bst_float loss_chg
loss change after split this node
Definition param.h:400
bst_feature_t sindex
split index
Definition param.h:402
void CopyAndCollect(SplitEntryContainer< GradientT > const &that, std::vector< uint32_t > *collected_cat_bits, std::vector< std::size_t > *cat_bits_sizes)
Copy primitive fields into this, and collect cat_bits into a vector.
Definition param.h:432
bool Update(bst_float new_loss_chg, bst_feature_t split_index, float new_split_value, bool default_left, bool is_cat, GradientSumT const &left_sum, GradientSumT const &right_sum)
update the split entry, replace it if e is better
Definition param.h:529
training parameters for regression tree
Definition param.h:28
bool NeedPrune(double loss_chg, int depth) const
given the loss change, whether we need to invoke pruning
Definition param.h:202