Used for learning a tree by single machine.
More...
#include <serial_tree_learner.h>
|
| SerialTreeLearner (const Config *config) |
|
void | Init (const Dataset *train_data, bool is_constant_hessian) override |
| Initialize tree learner with training dataset.
|
|
void | ResetTrainingData (const Dataset *train_data) override |
|
void | ResetConfig (const Config *config) override |
| Reset tree configs.
|
|
Tree * | Train (const score_t *gradients, const score_t *hessians, bool is_constant_hessian, Json &forced_split_json) override |
| training tree model on dataset
|
|
Tree * | FitByExistingTree (const Tree *old_tree, const score_t *gradients, const score_t *hessians) const override |
| use a existing tree to fit the new gradients and hessians.
|
|
Tree * | FitByExistingTree (const Tree *old_tree, const std::vector< int > &leaf_pred, const score_t *gradients, const score_t *hessians) override |
|
void | SetBaggingData (const data_size_t *used_indices, data_size_t num_data) override |
| Set bagging data.
|
|
void | AddPredictionToScore (const Tree *tree, double *out_score) const override |
| Using last trained tree to predict score then adding to out_score;.
|
|
void | RenewTreeOutput (Tree *tree, const ObjectiveFunction *obj, const double *prediction, data_size_t total_num_data, const data_size_t *bag_indices, data_size_t bag_cnt) const override |
|
void | RenewTreeOutput (Tree *tree, const ObjectiveFunction *obj, double prediction, data_size_t total_num_data, const data_size_t *bag_indices, data_size_t bag_cnt) const override |
|
virtual | ~TreeLearner () |
| virtual destructor
|
|
TreeLearner & | operator= (const TreeLearner &)=delete |
| Disable copy.
|
|
| TreeLearner (const TreeLearner &)=delete |
| Disable copy.
|
|
|
virtual void | BeforeTrain () |
| Some initial works before training.
|
|
virtual bool | BeforeFindBestSplit (const Tree *tree, int left_leaf, int right_leaf) |
| Some initial works before FindBestSplit.
|
|
virtual void | FindBestSplits () |
|
virtual void | ConstructHistograms (const std::vector< int8_t > &is_feature_used, bool use_subtract) |
|
virtual void | FindBestSplitsFromHistograms (const std::vector< int8_t > &is_feature_used, bool use_subtract) |
|
virtual void | Split (Tree *tree, int best_leaf, int *left_leaf, int *right_leaf) |
| Partition tree and data according best split.
|
|
virtual int32_t | ForceSplits (Tree *tree, Json &forced_split_json, int *left_leaf, int *right_leaf, int *cur_depth, bool *aborted_last_force_split) |
|
virtual data_size_t | GetGlobalDataCountInLeaf (int leaf_idx) const |
| Get the number of data in a leaf.
|
|
|
data_size_t | num_data_ |
| number of data
|
|
int | num_features_ |
| number of features
|
|
const Dataset * | train_data_ |
| training data
|
|
const score_t * | gradients_ |
| gradients of current iteration
|
|
const score_t * | hessians_ |
| hessians of current iteration
|
|
std::unique_ptr< DataPartition > | data_partition_ |
| training data partition on leaves
|
|
Random | random_ |
| used for generate used features
|
|
std::vector< int8_t > | is_feature_used_ |
| used for sub feature training, is_feature_used_[i] = false means don't used feature i
|
|
FeatureHistogram * | parent_leaf_histogram_array_ |
| pointer to histograms array of parent of current leaves
|
|
FeatureHistogram * | smaller_leaf_histogram_array_ |
| pointer to histograms array of smaller leaf
|
|
FeatureHistogram * | larger_leaf_histogram_array_ |
| pointer to histograms array of larger leaf
|
|
std::vector< SplitInfo > | best_split_per_leaf_ |
| store best split points for all leaves
|
|
std::unique_ptr< LeafSplits > | smaller_leaf_splits_ |
| stores best thresholds for all feature for smaller leaf
|
|
std::unique_ptr< LeafSplits > | larger_leaf_splits_ |
| stores best thresholds for all feature for larger leaf
|
|
std::vector< int > | valid_feature_indices_ |
|
std::vector< score_t > | ordered_gradients_ |
| gradients of current iteration, ordered for cache optimized
|
|
std::vector< score_t > | ordered_hessians_ |
| hessians of current iteration, ordered for cache optimized
|
|
std::vector< std::unique_ptr< OrderedBin > > | ordered_bins_ |
| Store ordered bin.
|
|
bool | has_ordered_bin_ = false |
| True if has ordered bin.
|
|
std::vector< char > | is_data_in_leaf_ |
| is_data_in_leaf_[i] != 0 means i-th data is marked
|
|
HistogramPool | histogram_pool_ |
| used to cache historical histogram to speed up
|
|
const Config * | config_ |
| config of tree learner
|
|
int | num_threads_ |
|
std::vector< int > | ordered_bin_indices_ |
|
bool | is_constant_hessian_ |
|
Used for learning a tree by single machine.
◆ AddPredictionToScore()
void LightGBM::SerialTreeLearner::AddPredictionToScore |
( |
const Tree * |
tree, |
|
|
double * |
out_score |
|
) |
| const |
|
inlineoverridevirtual |
Using last trained tree to predict score then adding to out_score;.
- Parameters
-
Implements LightGBM::TreeLearner.
◆ FitByExistingTree() [1/2]
Tree * LightGBM::SerialTreeLearner::FitByExistingTree |
( |
const Tree * |
old_tree, |
|
|
const score_t * |
gradients, |
|
|
const score_t * |
hessians |
|
) |
| const |
|
overridevirtual |
◆ FitByExistingTree() [2/2]
Tree * LightGBM::SerialTreeLearner::FitByExistingTree |
( |
const Tree * |
old_tree, |
|
|
const std::vector< int > & |
leaf_pred, |
|
|
const score_t * |
gradients, |
|
|
const score_t * |
hessians |
|
) |
| |
|
overridevirtual |
◆ GetGlobalDataCountInLeaf()
data_size_t LightGBM::SerialTreeLearner::GetGlobalDataCountInLeaf |
( |
int |
leaf_idx | ) |
const |
|
inlineprotectedvirtual |
Get the number of data in a leaf.
- Parameters
-
leaf_idx | The index of leaf |
- Returns
- The number of data in the leaf_idx leaf
◆ Init()
void LightGBM::SerialTreeLearner::Init |
( |
const Dataset * |
train_data, |
|
|
bool |
is_constant_hessian |
|
) |
| |
|
overridevirtual |
Initialize tree learner with training dataset.
- Parameters
-
train_data | The used training data |
is_constant_hessian | True if all hessians share the same value |
Implements LightGBM::TreeLearner.
◆ RenewTreeOutput() [1/2]
◆ RenewTreeOutput() [2/2]
◆ ResetConfig()
void LightGBM::SerialTreeLearner::ResetConfig |
( |
const Config * |
config | ) |
|
|
overridevirtual |
◆ ResetTrainingData()
void LightGBM::SerialTreeLearner::ResetTrainingData |
( |
const Dataset * |
train_data | ) |
|
|
overridevirtual |
◆ SetBaggingData()
Set bagging data.
- Parameters
-
used_indices | Used data indices |
num_data | Number of used data |
Implements LightGBM::TreeLearner.
◆ Split()
void LightGBM::SerialTreeLearner::Split |
( |
Tree * |
tree, |
|
|
int |
best_leaf, |
|
|
int * |
left_leaf, |
|
|
int * |
right_leaf |
|
) |
| |
|
protectedvirtual |
Partition tree and data according best split.
- Parameters
-
tree | Current tree, will be splitted on this function. |
best_leaf | The index of leaf that will be splitted. |
left_leaf | The index of left leaf after splitted. |
right_leaf | The index of right leaf after splitted. |
◆ Train()
Tree * LightGBM::SerialTreeLearner::Train |
( |
const score_t * |
gradients, |
|
|
const score_t * |
hessians, |
|
|
bool |
is_constant_hessian, |
|
|
Json & |
forced_split_json |
|
) |
| |
|
overridevirtual |
training tree model on dataset
- Parameters
-
gradients | The first order gradients |
hessians | The second order gradients |
is_constant_hessian | True if all hessians share the same value |
- Returns
- A trained tree
Implements LightGBM::TreeLearner.
The documentation for this class was generated from the following files:
- External/LightGBM_2.2.3/LightGBM-2.2.3/src/treelearner/serial_tree_learner.h
- External/LightGBM_2.2.3/LightGBM-2.2.3/src/treelearner/serial_tree_learner.cpp