Medial Code Documentation
Loading...
Searching...
No Matches
split_evaluator.h
Go to the documentation of this file.
1
8#ifndef XGBOOST_TREE_SPLIT_EVALUATOR_H_
9#define XGBOOST_TREE_SPLIT_EVALUATOR_H_
10
11#include <dmlc/registry.h>
12#include <xgboost/base.h>
13
14#include <algorithm>
15#include <limits>
16#include <utility>
17#include <vector>
18
19#include "../common/math.h"
20#include "../common/transform.h"
21#include "param.h"
22#include "xgboost/context.h"
24#include "xgboost/tree_model.h"
25
26namespace xgboost {
27namespace tree {
29 // hist and exact use parent id to calculate constraints.
30 static constexpr bst_node_t kRootParentId =
31 (-1 & static_cast<bst_node_t>((1U << 31) - 1));
32
33 HostDeviceVector<float> lower_bounds_;
34 HostDeviceVector<float> upper_bounds_;
36 int32_t device_;
37 bool has_constraint_;
38
39 public:
40 TreeEvaluator(TrainParam const& p, bst_feature_t n_features, int32_t device) {
41 device_ = device;
42 if (device != Context::kCpuId) {
43 lower_bounds_.SetDevice(device);
44 upper_bounds_.SetDevice(device);
45 monotone_.SetDevice(device);
46 }
47
48 if (p.monotone_constraints.empty()) {
49 monotone_.HostVector().resize(n_features, 0);
50 has_constraint_ = false;
51 } else {
52 CHECK_LE(p.monotone_constraints.size(), n_features)
53 << "The size of monotone constraint should be less or equal to the number of features.";
54 monotone_.HostVector() = p.monotone_constraints;
55 monotone_.HostVector().resize(n_features, 0);
56 // Initialised to some small size, can grow if needed
57 lower_bounds_.Resize(256, -std::numeric_limits<float>::max());
58 upper_bounds_.Resize(256, std::numeric_limits<float>::max());
59 has_constraint_ = true;
60 }
61
62 if (device_ != Context::kCpuId) {
63 // Pull to device early.
64 lower_bounds_.ConstDeviceSpan();
65 upper_bounds_.ConstDeviceSpan();
66 monotone_.ConstDeviceSpan();
67 }
68 }
69
70 template <typename ParamT>
72 const int* constraints;
73 const float* lower;
74 const float* upper;
75 bool has_constraint;
76
77 template <typename GradientSumT>
78 XGBOOST_DEVICE float CalcSplitGain(const ParamT& param, bst_node_t nidx, bst_feature_t fidx,
79 GradientSumT const& left, GradientSumT const& right) const {
80 int constraint = has_constraint ? constraints[fidx] : 0;
81 const float negative_infinity = -std::numeric_limits<float>::infinity();
82 float wleft = this->CalcWeight(nidx, param, left);
83 float wright = this->CalcWeight(nidx, param, right);
84
85 float gain = this->CalcGainGivenWeight(param, left, wleft) +
86 this->CalcGainGivenWeight(param, right, wright);
87
88 if (constraint == 0) {
89 return gain;
90 } else if (constraint > 0) {
91 return wleft <= wright ? gain : negative_infinity;
92 } else {
93 return wleft >= wright ? gain : negative_infinity;
94 }
95 }
96
97 template <typename GradientSumT>
98 XGBOOST_DEVICE float CalcWeight(bst_node_t nodeid, const ParamT &param,
99 GradientSumT const& stats) const {
100 float w = ::xgboost::tree::CalcWeight(param, stats);
101 if (!has_constraint) {
102 return w;
103 }
104
105 if (nodeid == kRootParentId) {
106 return w;
107 } else if (w < lower[nodeid]) {
108 return lower[nodeid];
109 } else if (w > upper[nodeid]) {
110 return upper[nodeid];
111 } else {
112 return w;
113 }
114 }
115
116 template <typename GradientSumT>
117 XGBOOST_DEVICE float CalcWeightCat(ParamT const& param, GradientSumT const& stats) const {
118 // FIXME(jiamingy): This is a temporary solution until we have categorical feature
119 // specific regularization parameters. During sorting we should try to avoid any
120 // regularization.
121 return ::xgboost::tree::CalcWeight(param, stats);
122 }
123
124 // Fast floating point division instruction on device
125 XGBOOST_DEVICE float Divide(float a, float b) const {
126#ifdef __CUDA_ARCH__
127 return __fdividef(a, b);
128#else
129 return a / b;
130#endif
131 }
132
133 template <typename GradientSumT>
134 XGBOOST_DEVICE float CalcGainGivenWeight(ParamT const& p, GradientSumT const& stats,
135 float w) const {
136 if (stats.GetHess() <= 0) {
137 return .0f;
138 }
139 // Avoiding tree::CalcGainGivenWeight can significantly reduce avg floating point error.
140 if (p.max_delta_step == 0.0f && has_constraint == false) {
141 return Divide(common::Sqr(ThresholdL1(stats.GetGrad(), p.reg_alpha)),
142 (stats.GetHess() + p.reg_lambda));
143 }
144 return tree::CalcGainGivenWeight<ParamT, float>(p, stats.GetGrad(),
145 stats.GetHess(), w);
146 }
147 template <typename GradientSumT>
148 XGBOOST_DEVICE float CalcGain(bst_node_t nid, ParamT const &p,
149 GradientSumT const& stats) const {
150 return this->CalcGainGivenWeight(p, stats, this->CalcWeight(nid, p, stats));
151 }
152 };
153
154 public:
155 /* Get a view to the evaluator that can be passed down to device. */
156 template <typename ParamT = TrainParam> auto GetEvaluator() const {
157 if (device_ != Context::kCpuId) {
158 auto constraints = monotone_.ConstDevicePointer();
159 return SplitEvaluator<ParamT>{constraints, lower_bounds_.ConstDevicePointer(),
160 upper_bounds_.ConstDevicePointer(), has_constraint_};
161 } else {
162 auto constraints = monotone_.ConstHostPointer();
163 return SplitEvaluator<ParamT>{constraints, lower_bounds_.ConstHostPointer(),
164 upper_bounds_.ConstHostPointer(), has_constraint_};
165 }
166 }
167
168 template <bool CompiledWithCuda = WITH_CUDA()>
169 void AddSplit(bst_node_t nodeid, bst_node_t leftid, bst_node_t rightid,
170 bst_feature_t f, float left_weight, float right_weight) {
171 if (!has_constraint_) {
172 return;
173 }
174
175 size_t max_nidx = std::max(leftid, rightid);
176 if (lower_bounds_.Size() <= max_nidx) {
177 lower_bounds_.Resize(max_nidx * 2 + 1, -std::numeric_limits<float>::max());
178 }
179 if (upper_bounds_.Size() <= max_nidx) {
180 upper_bounds_.Resize(max_nidx * 2 + 1, std::numeric_limits<float>::max());
181 }
182
184 [=] XGBOOST_DEVICE(size_t, common::Span<float> lower,
185 common::Span<float> upper,
186 common::Span<int> monotone) {
187 lower[leftid] = lower[nodeid];
188 upper[leftid] = upper[nodeid];
189
190 lower[rightid] = lower[nodeid];
191 upper[rightid] = upper[nodeid];
192 int32_t c = monotone[f];
193 bst_float mid = (left_weight + right_weight) / 2;
194
195 SPAN_CHECK(!common::CheckNAN(mid));
196
197 if (c < 0) {
198 lower[leftid] = mid;
199 upper[rightid] = mid;
200 } else if (c > 0) {
201 upper[leftid] = mid;
202 lower[rightid] = mid;
203 }
204 },
205 common::Range(0, 1), 1, device_)
206 .Eval(&lower_bounds_, &upper_bounds_, &monotone_);
207 }
208};
209
210enum SplitType {
211 // numerical split
212 kNum = 0,
213 // onehot encoding based categorical split
214 kOneHot = 1,
215 // partition-based categorical split
216 kPart = 2
217};
218} // namespace tree
219} // namespace xgboost
220
221#endif // XGBOOST_TREE_SPLIT_EVALUATOR_H_
Definition host_device_vector.h:87
static Evaluator< Functor > Init(Functor func, Range const range, int32_t n_threads, int32_t device_idx)
Initialize a Transform object.
Definition transform.h:194
Definition split_evaluator.h:28
Copyright 2014-2023, XGBoost Contributors.
A device-and-host vector abstraction layer.
Copyright 2015-2023 by XGBoost Contributors.
#define XGBOOST_DEVICE
Tag function as usable by device.
Definition base.h:64
namespace of xgboost
Definition base.h:90
uint32_t bst_feature_t
Type for data column (feature) index.
Definition base.h:101
std::int32_t bst_node_t
Type for tree node index.
Definition base.h:112
float bst_float
float type, used for storing statistics
Definition base.h:97
Registry utility that helps to build registry singletons.
training parameters for regression tree
Definition param.h:28
Definition split_evaluator.h:71
Copyright 2014-2023 by XGBoost Contributors.
Copyright 2014-2023 by Contributors.