1#ifndef LIGHTGBM_TREELEARNER_SERIAL_TREE_LEARNER_H_
2#define LIGHTGBM_TREELEARNER_SERIAL_TREE_LEARNER_H_
3#include <LightGBM/tree_learner.h>
5#include <LightGBM/utils/random.h>
6#include <LightGBM/utils/array_args.h>
8#include <LightGBM/dataset.h>
9#include <LightGBM/tree.h>
11#include "feature_histogram.hpp"
12#include "split_info.hpp"
13#include "data_partition.hpp"
14#include "leaf_splits.hpp"
24#include <boost/align/aligned_allocator.hpp>
27using namespace json11;
40 void Init(
const Dataset* train_data,
bool is_constant_hessian)
override;
42 void ResetTrainingData(
const Dataset* train_data)
override;
47 Json& forced_split_json)
override;
61 #pragma omp parallel for schedule(static)
63 double output =
static_cast<double>(tree->
LeafOutput(i));
67 out_score[tmp_idx[j]] += output;
89 virtual void FindBestSplits();
91 virtual void ConstructHistograms(
const std::vector<int8_t>& is_feature_used,
bool use_subtract);
93 virtual void FindBestSplitsFromHistograms(
const std::vector<int8_t>& is_feature_used,
bool use_subtract);
102 virtual void Split(
Tree* tree,
int best_leaf,
int* left_leaf,
int* right_leaf);
105 virtual int32_t ForceSplits(
Tree* tree,
Json& forced_split_json,
int* left_leaf,
106 int* right_leaf,
int* cur_depth,
107 bool *aborted_last_force_split);
145 std::vector<int> valid_feature_indices_;
149 std::vector<score_t, boost::alignment::aligned_allocator<score_t, 4096>>
ordered_gradients_;
151 std::vector<score_t, boost::alignment::aligned_allocator<score_t, 4096>>
ordered_hessians_;
170 std::vector<int> ordered_bin_indices_;
171 bool is_constant_hessian_;
The main class of data set, which are used to traning or validation.
Definition dataset.h:278
FeatureHistogram is used to construct and store a histogram for a feature.
Definition feature_histogram.hpp:29
Definition feature_histogram.hpp:646
The interface of Objective Function.
Definition objective_function.h:13
A wrapper for random generator.
Definition random.h:15
Used for learning a tree by single machine.
Definition serial_tree_learner.h:34
std::unique_ptr< LeafSplits > smaller_leaf_splits_
stores best thresholds for all feature for smaller leaf
Definition serial_tree_learner.h:142
const score_t * gradients_
gradients of current iteration
Definition serial_tree_learner.h:122
FeatureHistogram * parent_leaf_histogram_array_
pointer to histograms array of parent of current leaves
Definition serial_tree_learner.h:132
std::unique_ptr< LeafSplits > larger_leaf_splits_
stores best thresholds for all feature for larger leaf
Definition serial_tree_learner.h:144
std::vector< std::unique_ptr< OrderedBin > > ordered_bins_
Store ordered bin.
Definition serial_tree_learner.h:160
std::vector< char > is_data_in_leaf_
is_data_in_leaf_[i] != 0 means i-th data is marked
Definition serial_tree_learner.h:164
bool has_ordered_bin_
True if has ordered bin.
Definition serial_tree_learner.h:162
FeatureHistogram * larger_leaf_histogram_array_
pointer to histograms array of larger leaf
Definition serial_tree_learner.h:136
HistogramPool histogram_pool_
used to cache historical histogram to speed up
Definition serial_tree_learner.h:166
std::vector< SplitInfo > best_split_per_leaf_
store best split points for all leaves
Definition serial_tree_learner.h:139
data_size_t num_data_
number of data
Definition serial_tree_learner.h:116
Tree * FitByExistingTree(const Tree *old_tree, const score_t *gradients, const score_t *hessians) const override
use a existing tree to fit the new gradients and hessians.
Definition serial_tree_learner.cpp:223
void Init(const Dataset *train_data, bool is_constant_hessian) override
Initialize tree learner with training dataset.
Definition serial_tree_learner.cpp:44
Tree * Train(const score_t *gradients, const score_t *hessians, bool is_constant_hessian, Json &forced_split_json) override
training tree model on dataset
Definition serial_tree_learner.cpp:157
void AddPredictionToScore(const Tree *tree, double *out_score) const override
Using last trained tree to predict score then adding to out_score;.
Definition serial_tree_learner.h:58
FeatureHistogram * smaller_leaf_histogram_array_
pointer to histograms array of smaller leaf
Definition serial_tree_learner.h:134
std::unique_ptr< DataPartition > data_partition_
training data partition on leaves
Definition serial_tree_learner.h:126
int num_features_
number of features
Definition serial_tree_learner.h:118
std::vector< int8_t > is_feature_used_
used for sub feature training, is_feature_used_[i] = false means don't used feature i
Definition serial_tree_learner.h:130
void SetBaggingData(const data_size_t *used_indices, data_size_t num_data) override
Set bagging data.
Definition serial_tree_learner.h:54
virtual data_size_t GetGlobalDataCountInLeaf(int leaf_idx) const
Get the number of data in a leaf.
Definition serial_tree_learner.h:174
std::vector< score_t > ordered_hessians_
hessians of current iteration, ordered for cache optimized
Definition serial_tree_learner.h:156
const score_t * hessians_
hessians of current iteration
Definition serial_tree_learner.h:124
const Dataset * train_data_
training data
Definition serial_tree_learner.h:120
Random random_
used for generate used features
Definition serial_tree_learner.h:128
virtual void Split(Tree *tree, int best_leaf, int *left_leaf, int *right_leaf)
Partition tree and data according best split.
Definition serial_tree_learner.cpp:703
void ResetConfig(const Config *config) override
Reset tree configs.
Definition serial_tree_learner.cpp:128
std::vector< score_t > ordered_gradients_
gradients of current iteration, ordered for cache optimized
Definition serial_tree_learner.h:154
virtual void BeforeTrain()
Some initial works before training.
Definition serial_tree_learner.cpp:255
const Config * config_
config of tree learner
Definition serial_tree_learner.h:168
virtual bool BeforeFindBestSplit(const Tree *tree, int left_leaf, int right_leaf)
Some initial works before FindBestSplit.
Definition serial_tree_learner.cpp:348
Interface for tree learner.
Definition tree_learner.h:23
Tree model.
Definition tree.h:20
int num_leaves() const
Get Number of leaves.
Definition tree.h:121
double LeafOutput(int leaf) const
Get the output of one leaf.
Definition tree.h:78
desc and descl2 fields must be written in reStructuredText format
Definition application.h:10
float score_t
Type of score, and gradients.
Definition meta.h:26
int32_t data_size_t
Type of data size, it is better to use signed type.
Definition meta.h:14