15#include "../gbm/gblinear_model.h"
16#include "../common/random.h"
17#include "../common/threading_utils.h"
25 DMLC_DECLARE_FIELD(top_k)
28 .describe(
"The number of top features to select in 'thrifty' feature_selector. "
29 "The value of zero means using all the features.");
46 double reg_alpha,
double reg_lambda) {
47 if (sum_hess < 1e-5f)
return 0.0f;
48 const double sum_grad_l2 = sum_grad + reg_lambda * w;
49 const double sum_hess_l2 = sum_hess + reg_lambda;
50 const double tmp = w - sum_grad_l2 / sum_hess_l2;
52 return std::max(-(sum_grad_l2 + reg_alpha) / sum_hess_l2, -w);
54 return std::min(-(sum_grad_l2 - reg_alpha) / sum_hess_l2, -w);
67 return -sum_grad / sum_hess;
83 std::vector<GradientPair>
const &gpair,
85 double sum_grad = 0.0, sum_hess = 0.0;
87 auto page = batch.GetView();
88 auto col = page[fidx];
89 const auto ndata =
static_cast<bst_omp_uint>(col.size());
92 auto &p = gpair[col[j].index * num_group + group_idx];
93 if (p.GetHess() < 0.0f)
continue;
94 sum_grad += p.GetGrad() * v;
95 sum_hess += p.GetHess() * v * v;
98 return std::make_pair(sum_grad, sum_hess);
113 int num_group,
int fidx,
114 const std::vector<GradientPair> &gpair,
116 std::vector<double> sum_grad_tloc(ctx->
Threads(), 0.0);
117 std::vector<double> sum_hess_tloc(ctx->
Threads(), 0.0);
120 auto page = batch.GetView();
121 auto col = page[fidx];
122 const auto ndata =
static_cast<bst_omp_uint>(col.size());
123 common::ParallelFor(ndata, ctx->
Threads(), [&](
size_t j) {
124 const bst_float v = col[j].fvalue;
125 auto &p = gpair[col[j].index * num_group + group_idx];
126 if (p.GetHess() < 0.0f) {
129 auto t_idx = omp_get_thread_num();
130 sum_grad_tloc[t_idx] += p.GetGrad() * v;
131 sum_hess_tloc[t_idx] += p.GetHess() * v * v;
135 std::accumulate(sum_grad_tloc.cbegin(), sum_grad_tloc.cend(), 0.0);
137 std::accumulate(sum_hess_tloc.cbegin(), sum_hess_tloc.cend(), 0.0);
138 return std::make_pair(sum_grad, sum_hess);
152 const std::vector<GradientPair> &gpair,
153 DMatrix *p_fmat, int32_t n_threads) {
155 std::vector<double> sum_grad_tloc(n_threads, 0);
156 std::vector<double> sum_hess_tloc(n_threads, 0);
158 common::ParallelFor(ndata, n_threads, [&](
auto i) {
159 auto tid = omp_get_thread_num();
160 auto &p = gpair[i * num_group + group_idx];
161 if (p.GetHess() >= 0.0f) {
162 sum_grad_tloc[tid] += p.GetGrad();
163 sum_hess_tloc[tid] += p.GetHess();
166 double sum_grad = std::accumulate(sum_grad_tloc.cbegin(), sum_grad_tloc.cend(), 0.0);
167 double sum_hess = std::accumulate(sum_hess_tloc.cbegin(), sum_hess_tloc.cend(), 0.0);
168 return std::make_pair(sum_grad, sum_hess);
182 int num_group,
float dw, std::vector<GradientPair> *in_gpair,
184 if (dw == 0.0f)
return;
186 auto page = batch.GetView();
187 auto col = page[fidx];
189 const auto num_row =
static_cast<bst_omp_uint>(col.size());
190 common::ParallelFor(num_row, ctx->
Threads(), [&](
auto j) {
191 GradientPair &p = (*in_gpair)[col[j].index * num_group + group_idx];
192 if (p.GetHess() < 0.0f) return;
193 p += GradientPair(p.GetHess() * col[j].fvalue * dw, 0);
208 float dbias, std::vector<GradientPair> *in_gpair,
210 if (dbias == 0.0f)
return;
212 common::ParallelFor(ndata, ctx->
Threads(), [&](
auto i) {
213 GradientPair &g = (*in_gpair)[i * num_group + group_idx];
214 if (g.GetHess() < 0.0f) return;
215 g += GradientPair(g.GetHess() * dbias, 0);
242 const std::vector<GradientPair> &,
DMatrix *,
float,
float,
int) {}
258 int group_idx,
const std::vector<GradientPair> &gpair,
DMatrix *p_fmat,
259 float alpha,
float lambda) = 0;
267 using FeatureSelector::FeatureSelector;
269 const std::vector<GradientPair> &,
DMatrix *,
float,
float)
override {
270 return iteration % model.learner_model_param->num_feature;
280 using FeatureSelector::FeatureSelector;
282 DMatrix *,
float,
float,
int)
override {
283 if (feat_index_.size() == 0) {
284 feat_index_.resize(model.learner_model_param->num_feature);
285 std::iota(feat_index_.begin(), feat_index_.end(), 0);
287 std::shuffle(feat_index_.begin(), feat_index_.end(), common::GlobalRandom());
291 const std::vector<GradientPair> &,
DMatrix *,
float,
float)
override {
292 return feat_index_[iteration % model.learner_model_param->num_feature];
296 std::vector<bst_uint> feat_index_;
305 using FeatureSelector::FeatureSelector;
307 const std::vector<GradientPair> &,
DMatrix *,
float,
float)
override {
308 return common::GlobalRandom()() % model.learner_model_param->num_feature;
323 using FeatureSelector::FeatureSelector;
325 DMatrix *,
float,
float,
int param)
override {
326 top_k_ =
static_cast<bst_uint>(param);
327 const bst_uint ngroup = model.learner_model_param->num_output_group;
328 if (param <= 0) top_k_ = std::numeric_limits<bst_uint>::max();
329 if (counter_.size() == 0) {
330 counter_.resize(ngroup);
331 gpair_sums_.resize(model.learner_model_param->num_feature * ngroup);
333 for (
bst_uint gid = 0u; gid < ngroup; ++gid) {
339 int group_idx,
const std::vector<GradientPair> &gpair,
340 DMatrix *p_fmat,
float alpha,
float lambda)
override {
342 auto k = counter_[group_idx]++;
344 if (k >= top_k_ || counter_[group_idx] == model.learner_model_param->num_feature)
return -1;
346 const int ngroup = model.learner_model_param->num_output_group;
347 const bst_omp_uint nfeat = model.learner_model_param->num_feature;
349 std::fill(gpair_sums_.begin(), gpair_sums_.end(), std::make_pair(0., 0.));
351 auto page = batch.GetView();
353 const auto col = page[i];
354 const bst_uint ndata = col.size();
355 auto &sums = gpair_sums_[group_idx * nfeat + i];
356 for (bst_uint j = 0u; j < ndata; ++j) {
357 const bst_float v = col[j].fvalue;
358 auto &p = gpair[col[j].index * ngroup + group_idx];
359 if (p.GetHess() < 0.f) continue;
360 sums.first += p.GetGrad() * v;
361 sums.second += p.GetHess() * v * v;
367 double best_weight_update = 0.0f;
369 auto &s = gpair_sums_[group_idx * nfeat + fidx];
370 float dw = std::abs(
static_cast<bst_float>(
371 CoordinateDelta(s.first, s.second, model[fidx][group_idx], alpha, lambda)));
372 if (dw > best_weight_update) {
373 best_weight_update = dw;
382 std::vector<bst_uint> counter_;
383 std::vector<std::pair<double, double>> gpair_sums_;
399 using FeatureSelector::FeatureSelector;
402 const std::vector<GradientPair> &gpair,
DMatrix *p_fmat,
float alpha,
float lambda,
403 int param)
override {
404 top_k_ =
static_cast<bst_uint>(param);
405 if (param <= 0) top_k_ = std::numeric_limits<bst_uint>::max();
406 const bst_uint ngroup = model.learner_model_param->num_output_group;
407 const bst_omp_uint nfeat = model.learner_model_param->num_feature;
409 if (deltaw_.size() == 0) {
410 deltaw_.resize(nfeat * ngroup);
411 sorted_idx_.resize(nfeat * ngroup);
412 counter_.resize(ngroup);
413 gpair_sums_.resize(nfeat * ngroup);
416 std::fill(gpair_sums_.begin(), gpair_sums_.end(), std::make_pair(0., 0.));
418 auto page = batch.GetView();
420 common::ParallelFor(nfeat, ctx->
Threads(), [&](
auto i) {
421 const auto col = page[i];
422 const bst_uint ndata = col.size();
423 for (bst_uint gid = 0u; gid < ngroup; ++gid) {
424 auto &sums = gpair_sums_[gid * nfeat + i];
425 for (bst_uint j = 0u; j < ndata; ++j) {
426 const bst_float v = col[j].fvalue;
427 auto &p = gpair[col[j].index * ngroup + gid];
428 if (p.GetHess() < 0.f) continue;
429 sums.first += p.GetGrad() * v;
430 sums.second += p.GetHess() * v * v;
436 std::fill(deltaw_.begin(), deltaw_.end(), 0.f);
437 std::iota(sorted_idx_.begin(), sorted_idx_.end(), 0);
439 for (
bst_uint gid = 0u; gid < ngroup; ++gid) {
442 auto ii = gid * nfeat + i;
443 auto &s = gpair_sums_[ii];
445 s.first, s.second, model[i][gid], alpha, lambda));
448 auto start = sorted_idx_.begin() + gid * nfeat;
449 std::sort(start, start + nfeat,
450 [pdeltaw](
size_t i,
size_t j) {
451 return std::abs(*(pdeltaw + i)) > std::abs(*(pdeltaw + j));
458 const std::vector<GradientPair> &,
DMatrix *,
float,
float)
override {
460 auto k = counter_[group_idx]++;
462 if (k >= top_k_ || counter_[group_idx] == model.learner_model_param->num_feature)
return -1;
464 const size_t grp_offset = group_idx * model.learner_model_param->num_feature;
465 return static_cast<int>(sorted_idx_[grp_offset + k] - grp_offset);
470 std::vector<bst_float> deltaw_;
471 std::vector<size_t> sorted_idx_;
472 std::vector<bst_uint> counter_;
473 std::vector<std::pair<double, double>> gpair_sums_;
489 LOG(FATAL) <<
"unknown coordinate selector: " << choice;
Internal data structured used by XGBoost during training.
Definition data.h:509
virtual MetaInfo & Info()=0
meta information of the dataset
BatchSet< T > GetBatches()
Gets batches.
Definition gblinear_model.h:44
Deterministic selection by cycling through features one at a time.
Definition coordinate_common.h:265
int NextFeature(Context const *, int iteration, const gbm::GBLinearModel &model, int, const std::vector< GradientPair > &, DMatrix *, float, float) override
Select next coordinate to update.
Definition coordinate_common.h:268
Abstract class for stateful feature selection or ordering in coordinate descent algorithms.
Definition coordinate_common.h:223
virtual int NextFeature(Context const *ctx, int iteration, const gbm::GBLinearModel &model, int group_idx, const std::vector< GradientPair > &gpair, DMatrix *p_fmat, float alpha, float lambda)=0
Select next coordinate to update.
virtual void Setup(Context const *, const gbm::GBLinearModel &, const std::vector< GradientPair > &, DMatrix *, float, float, int)
Setting up the selector state prior to looping through features.
Definition coordinate_common.h:241
virtual ~FeatureSelector()=default
virtual destructor
Select coordinate with the greatest gradient magnitude.
Definition coordinate_common.h:321
void Setup(Context const *, const gbm::GBLinearModel &model, const std::vector< GradientPair > &, DMatrix *, float, float, int param) override
Setting up the selector state prior to looping through features.
Definition coordinate_common.h:324
int NextFeature(Context const *ctx, int, const gbm::GBLinearModel &model, int group_idx, const std::vector< GradientPair > &gpair, DMatrix *p_fmat, float alpha, float lambda) override
Select next coordinate to update.
Definition coordinate_common.h:338
A random (with replacement) coordinate selector.
Definition coordinate_common.h:303
int NextFeature(Context const *, int, const gbm::GBLinearModel &model, int, const std::vector< GradientPair > &, DMatrix *, float, float) override
Select next coordinate to update.
Definition coordinate_common.h:306
Similar to Cyclic but with random feature shuffling prior to each update.
Definition coordinate_common.h:278
int NextFeature(Context const *, int iteration, const gbm::GBLinearModel &model, int, const std::vector< GradientPair > &, DMatrix *, float, float) override
Select next coordinate to update.
Definition coordinate_common.h:290
void Setup(Context const *, const gbm::GBLinearModel &model, const std::vector< GradientPair > &, DMatrix *, float, float, int) override
Setting up the selector state prior to looping through features.
Definition coordinate_common.h:281
Thrifty, approximately-greedy feature selector.
Definition coordinate_common.h:397
int NextFeature(Context const *, int, const gbm::GBLinearModel &model, int group_idx, const std::vector< GradientPair > &, DMatrix *, float, float) override
Select next coordinate to update.
Definition coordinate_common.h:457
void Setup(Context const *ctx, const gbm::GBLinearModel &model, const std::vector< GradientPair > &gpair, DMatrix *p_fmat, float alpha, float lambda, int param) override
Setting up the selector state prior to looping through features.
Definition coordinate_common.h:401
Copyright 2015-2023 by XGBoost Contributors.
macro for using C++11 enum class as DMLC parameter
std::pair< double, double > GetGradientParallel(Context const *ctx, int group_idx, int num_group, int fidx, const std::vector< GradientPair > &gpair, DMatrix *p_fmat)
Get the gradient with respect to a single feature.
Definition coordinate_common.h:112
void UpdateBiasResidualParallel(Context const *ctx, int group_idx, int num_group, float dbias, std::vector< GradientPair > *in_gpair, DMatrix *p_fmat)
Updates the gradient vector based on a change in the bias.
Definition coordinate_common.h:207
std::pair< double, double > GetBiasGradientParallel(int group_idx, int num_group, const std::vector< GradientPair > &gpair, DMatrix *p_fmat, int32_t n_threads)
Get the gradient with respect to the bias.
Definition coordinate_common.h:151
double CoordinateDelta(double sum_grad, double sum_hess, double w, double reg_alpha, double reg_lambda)
Calculate change in weight for a given feature.
Definition coordinate_common.h:45
std::pair< double, double > GetGradient(Context const *ctx, int group_idx, int num_group, bst_feature_t fidx, std::vector< GradientPair > const &gpair, DMatrix *p_fmat)
Get the gradient with respect to a single feature.
Definition coordinate_common.h:81
void UpdateResidualParallel(Context const *ctx, bst_feature_t fidx, int group_idx, int num_group, float dw, std::vector< GradientPair > *in_gpair, DMatrix *p_fmat)
Updates the gradient vector with respect to a change in weight.
Definition coordinate_common.h:181
double CoordinateDeltaBias(double sum_grad, double sum_hess)
Calculate update to bias.
Definition coordinate_common.h:66
namespace of xgboost
Definition base.h:90
uint32_t bst_feature_t
Type for data column (feature) index.
Definition base.h:101
dmlc::omp_uint bst_omp_uint
define unsigned int for openmp loop
Definition base.h:324
uint32_t bst_uint
unsigned integer type used for feature index.
Definition base.h:93
float bst_float
float type, used for storing statistics
Definition base.h:97
Runtime context for XGBoost.
Definition context.h:84
std::int32_t Threads() const
Returns the automatically chosen number of threads based on the nthread parameter and the system sett...
Definition context.cc:203
Definition parameter.h:84
Definition coordinate_common.h:22
Copyright 2014-2023 by XGBoost Contributors.