Medial Code Documentation
|
GBDT algorithm implementation. including Training, prediction, bagging. More...
#include <gbdt.h>
Public Member Functions | |
GBDT () | |
Constructor. | |
~GBDT () | |
Destructor. | |
void | Init (const Config *gbdt_config, const Dataset *train_data, const ObjectiveFunction *objective_function, const std::vector< const Metric * > &training_metrics) override |
Initialization logic. | |
void | MergeFrom (const Boosting *other) override |
Merge model from other boosting object. Will insert to the front of current boosting object. | |
void | ShuffleModels (int start_iter, int end_iter) override |
Shuffle Existing Models. | |
void | ResetTrainingData (const Dataset *train_data, const ObjectiveFunction *objective_function, const std::vector< const Metric * > &training_metrics) override |
Reset the training data. | |
void | ResetConfig (const Config *gbdt_config) override |
Reset Boosting Config. | |
void | AddValidDataset (const Dataset *valid_data, const std::vector< const Metric * > &valid_metrics) override |
Adding a validation dataset. | |
void | Train (int snapshot_freq, const std::string &model_output_path) override |
Perform a full training procedure. | |
void | RefitTree (const std::vector< std::vector< int > > &tree_leaf_prediction) override |
Update the tree output by new training data. | |
virtual bool | TrainOneIter (const score_t *gradients, const score_t *hessians) override |
Training logic. | |
void | RollbackOneIter () override |
Rollback one iteration. | |
int | GetCurrentIteration () const override |
Get current iteration. | |
bool | NeedAccuratePrediction () const override |
Can use early stopping for prediction or not. | |
std::vector< double > | GetEvalAt (int data_idx) const override |
Get evaluation result at data_idx data. | |
virtual const double * | GetTrainingScore (int64_t *out_len) override |
Get current training score. | |
virtual int64_t | GetNumPredictAt (int data_idx) const override |
Get size of prediction at data_idx data. | |
void | GetPredictAt (int data_idx, double *out_result, int64_t *out_len) override |
Get prediction result at data_idx data. | |
int | NumPredictOneRow (int num_iteration, bool is_pred_leaf, bool is_pred_contrib) const override |
Get number of prediction for one data. | |
void | PredictRaw (const double *features, double *output, const PredictionEarlyStopInstance *earlyStop) const override |
Prediction for one record, not sigmoid transform. | |
void | PredictRawByMap (const std::unordered_map< int, double > &features, double *output, const PredictionEarlyStopInstance *early_stop) const override |
void | Predict (const double *features, double *output, const PredictionEarlyStopInstance *earlyStop) const override |
Prediction for one record, sigmoid transformation will be used if needed. | |
void | PredictByMap (const std::unordered_map< int, double > &features, double *output, const PredictionEarlyStopInstance *early_stop) const override |
void | PredictLeafIndex (const double *features, double *output) const override |
Prediction for one record with leaf index. | |
void | PredictLeafIndexByMap (const std::unordered_map< int, double > &features, double *output) const override |
void | PredictContrib (const double *features, double *output, const PredictionEarlyStopInstance *earlyStop) const override |
Feature contributions for the model's prediction of one record. | |
std::string | DumpModel (int start_iteration, int num_iteration) const override |
Dump model to json format string. | |
std::string | ModelToIfElse (int num_iteration) const override |
Translate model to if-else statement. | |
bool | SaveModelToIfElse (int num_iteration, const char *filename) const override |
Translate model to if-else statement. | |
virtual bool | SaveModelToFile (int start_iteration, int num_iterations, const char *filename) const override |
Save model to file. | |
std::string | SaveModelToString (int num_iterations) |
Save model to string. | |
virtual std::string | SaveModelToString (int start_iteration, int num_iterations) const override |
bool | LoadModelFromString (std::string str) |
Restore from a serialized buffer. | |
bool | LoadModelFromString (const char *buffer, size_t len) override |
std::vector< double > | FeatureImportance (int num_iteration, int importance_type) const override |
Calculate feature importances. | |
int | MaxFeatureIdx () const override |
Get max feature index of this model. | |
std::vector< std::string > | FeatureNames () const override |
Get feature names of this model. | |
int | LabelIdx () const override |
Get index of label column. | |
int | NumberOfTotalModel () const override |
Get number of weak sub-models. | |
int | NumModelPerIteration () const override |
Get number of tree per iteration. | |
int | NumberOfClasses () const override |
Get number of classes. | |
void | InitPredict (int num_iteration, bool is_pred_contrib) override |
Initial work for the prediction. | |
double | GetLeafValue (int tree_idx, int leaf_idx) const override |
void | SetLeafValue (int tree_idx, int leaf_idx, double val) override |
virtual const char * | SubModelName () const override |
Get Type name of this boosting object. | |
![]() | |
virtual | ~Boosting () |
virtual destructor | |
std::string | SaveModelToString (int num_iterations) |
Save model to string. | |
bool | LoadModelFromString (std::string str) |
Restore from a serialized string. | |
Boosting & | operator= (const Boosting &)=delete |
Disable copy. | |
Boosting (const Boosting &)=delete | |
Disable copy. | |
Protected Member Functions | |
virtual bool | EvalAndCheckEarlyStopping () |
Print eval result and check early stopping. | |
void | ResetBaggingConfig (const Config *config, bool is_change_dataset) |
reset config for bagging | |
virtual void | Bagging (int iter) |
Implement bagging logic. | |
data_size_t | BaggingHelper (Random &cur_rand, data_size_t start, data_size_t cnt, data_size_t *buffer) |
Helper function for bagging, used for multi-threading optimization. | |
virtual void | Boosting () |
calculate the object function | |
virtual void | UpdateScore (const Tree *tree, const int cur_tree_id) |
updating score after tree was trained | |
virtual std::vector< double > | EvalOneMetric (const Metric *metric, const double *score) const |
eval results for one metric | |
std::string | OutputMetric (int iter) |
Print metric result of current iteration. | |
double | BoostFromAverage (int class_id, bool update_scorer) |
Protected Attributes | |
int | iter_ |
current iteration | |
const Dataset * | train_data_ |
Pointer to training data. | |
std::unique_ptr< Config > | config_ |
Config of gbdt. | |
std::unique_ptr< TreeLearner > | tree_learner_ |
Tree learner, will use this class to learn trees. | |
const ObjectiveFunction * | objective_function_ |
Objective function. | |
std::unique_ptr< ScoreUpdater > | train_score_updater_ |
Store and update training data's score. | |
std::vector< const Metric * > | training_metrics_ |
Metrics for training data. | |
std::vector< std::unique_ptr< ScoreUpdater > > | valid_score_updater_ |
Store and update validation data's scores. | |
std::vector< std::vector< const Metric * > > | valid_metrics_ |
Metric for validation data. | |
int | early_stopping_round_ |
Number of rounds for early stopping. | |
std::vector< std::vector< int > > | best_iter_ |
Best iteration(s) for early stopping. | |
std::vector< std::vector< double > > | best_score_ |
Best score(s) for early stopping. | |
std::vector< std::vector< std::string > > | best_msg_ |
output message of best iteration | |
std::vector< std::unique_ptr< Tree > > | models_ |
Trained models(trees) | |
int | max_feature_idx_ |
Max feature index of training data. | |
std::vector< score_t > | gradients_ |
First order derivative of training data. | |
std::vector< score_t > | hessians_ |
Secend order derivative of training data. | |
std::vector< data_size_t > | bag_data_indices_ |
Store the indices of in-bag data. | |
data_size_t | bag_data_cnt_ |
Number of in-bag data. | |
std::vector< data_size_t > | tmp_indices_ |
Store the indices of in-bag data. | |
data_size_t | num_data_ |
Number of training data. | |
int | num_tree_per_iteration_ |
Number of trees per iterations. | |
int | num_class_ |
Number of class. | |
data_size_t | label_idx_ |
Index of label column. | |
int | num_iteration_for_pred_ |
number of used model | |
double | shrinkage_rate_ |
Shrinkage rate for one iteration. | |
int | num_init_iteration_ |
Number of loaded initial models. | |
std::vector< std::string > | feature_names_ |
Feature names. | |
std::vector< std::string > | feature_infos_ |
int | num_threads_ |
number of threads | |
std::vector< data_size_t > | offsets_buf_ |
Buffer for multi-threading bagging. | |
std::vector< data_size_t > | left_cnts_buf_ |
Buffer for multi-threading bagging. | |
std::vector< data_size_t > | right_cnts_buf_ |
Buffer for multi-threading bagging. | |
std::vector< data_size_t > | left_write_pos_buf_ |
Buffer for multi-threading bagging. | |
std::vector< data_size_t > | right_write_pos_buf_ |
Buffer for multi-threading bagging. | |
std::unique_ptr< Dataset > | tmp_subset_ |
bool | is_use_subset_ |
std::vector< bool > | class_need_train_ |
bool | is_constant_hessian_ |
std::unique_ptr< ObjectiveFunction > | loaded_objective_ |
bool | average_output_ |
bool | need_re_bagging_ |
std::string | loaded_parameter_ |
Json | forced_splits_json_ |
Additional Inherited Members | |
![]() | |
static bool | LoadFileToBoosting (Boosting *boosting, const char *filename) |
static Boosting * | CreateBoosting (const std::string &type, const char *filename) |
Create boosting object. | |
GBDT algorithm implementation. including Training, prediction, bagging.
|
overridevirtual |
Adding a validation dataset.
valid_data | Validation dataset |
valid_metrics | Metrics for validation dataset |
Implements LightGBM::Boosting.
Reimplemented in LightGBM::RF.
|
protectedvirtual |
|
protected |
Helper function for bagging, used for multi-threading optimization.
start | start indice of bagging |
cnt | count |
buffer | output buffer |
|
protectedvirtual |
calculate the object function
Reimplemented in LightGBM::RF.
|
overridevirtual |
Dump model to json format string.
start_iteration | The model will be saved start from |
num_iteration | Number of iterations that want to dump, -1 means dump all |
Implements LightGBM::Boosting.
|
protectedvirtual |
Print eval result and check early stopping.
Reimplemented in LightGBM::DART.
|
overridevirtual |
Calculate feature importances.
num_iteration | Number of model that want to use for feature importance, -1 means use all |
importance_type | 0 for split, 1 for gain |
Implements LightGBM::Boosting.
|
inlineoverridevirtual |
|
inlineoverridevirtual |
Get current iteration.
Implements LightGBM::Boosting.
|
overridevirtual |
Get evaluation result at data_idx data.
Get eval result.
data_idx | 0: training data, 1: 1st validation data |
Implements LightGBM::Boosting.
|
inlineoverridevirtual |
Implements LightGBM::GBDTBase.
|
inlineoverridevirtual |
Get size of prediction at data_idx data.
data_idx | 0: training data, 1: 1st validation data |
Implements LightGBM::Boosting.
|
overridevirtual |
Get prediction result at data_idx data.
data_idx | 0: training data, 1: 1st validation data |
result | used to store prediction result, should allocate memory before call this function |
out_len | length of returned score |
Implements LightGBM::Boosting.
|
overridevirtual |
Get current training score.
Get training scores result.
out_len | length of returned score |
Implements LightGBM::Boosting.
Reimplemented in LightGBM::DART.
|
overridevirtual |
Initialization logic.
gbdt_config | Config for boosting |
train_data | Training data |
objective_function | Training objective function |
training_metrics | Training metrics |
Implements LightGBM::Boosting.
Reimplemented in LightGBM::GOSS, and LightGBM::RF.
|
inlineoverridevirtual |
Initial work for the prediction.
num_iteration | number of used iteration |
is_pred_contrib |
Implements LightGBM::Boosting.
|
inlineoverridevirtual |
|
overridevirtual |
Implements LightGBM::Boosting.
|
inlineoverridevirtual |
Get max feature index of this model.
Implements LightGBM::Boosting.
|
inlineoverridevirtual |
Merge model from other boosting object. Will insert to the front of current boosting object.
other |
Implements LightGBM::Boosting.
|
overridevirtual |
Translate model to if-else statement.
num_iteration | Number of iterations that want to translate, -1 means translate all |
Implements LightGBM::Boosting.
|
inlineoverridevirtual |
Can use early stopping for prediction or not.
Implements LightGBM::Boosting.
Reimplemented in LightGBM::RF.
|
inlineoverridevirtual |
|
inlineoverridevirtual |
|
inlineoverridevirtual |
Get number of tree per iteration.
Implements LightGBM::Boosting.
|
inlineoverridevirtual |
Get number of prediction for one data.
num_iteration | number of used iterations |
is_pred_leaf | True if predicting leaf index |
is_pred_contrib | True if predicting feature contribution |
Implements LightGBM::Boosting.
|
protected |
Print metric result of current iteration.
iter | Current interation |
|
overridevirtual |
Prediction for one record, sigmoid transformation will be used if needed.
feature_values | Feature value on this record |
output | Prediction result for this record |
early_stop | Early stopping instance. If nullptr, no early stopping is applied and all models are evaluated. |
Implements LightGBM::Boosting.
|
overridevirtual |
Implements LightGBM::Boosting.
|
overridevirtual |
Feature contributions for the model's prediction of one record.
feature_values | Feature value on this record |
output | Prediction result for this record |
early_stop | Early stopping instance. If nullptr, no early stopping is applied and all models are evaluated. |
Implements LightGBM::Boosting.
|
overridevirtual |
Prediction for one record with leaf index.
feature_values | Feature value on this record |
output | Prediction result for this record |
Implements LightGBM::Boosting.
|
overridevirtual |
Implements LightGBM::Boosting.
|
overridevirtual |
Prediction for one record, not sigmoid transform.
feature_values | Feature value on this record |
output | Prediction result for this record |
early_stop | Early stopping instance. If nullptr, no early stopping is applied and all models are evaluated. |
Implements LightGBM::Boosting.
|
overridevirtual |
Implements LightGBM::Boosting.
|
overridevirtual |
Update the tree output by new training data.
Implements LightGBM::Boosting.
|
overridevirtual |
gbdt_config | Config for boosting |
Implements LightGBM::Boosting.
Reimplemented in LightGBM::GOSS, and LightGBM::RF.
|
overridevirtual |
Reset the training data.
train_data | New Training data |
objective_function | Training objective function |
training_metrics | Training metrics |
Implements LightGBM::Boosting.
Reimplemented in LightGBM::GOSS, and LightGBM::RF.
|
overridevirtual |
|
overridevirtual |
Save model to file.
start_iteration | The model will be saved start from |
num_iterations | Number of model that want to save, -1 means save all |
filename | Filename that want to save to |
File to write models
Implements LightGBM::Boosting.
|
overridevirtual |
Translate model to if-else statement.
num_iteration | Number of iterations that want to translate, -1 means translate all |
filename | Filename that want to save to |
File to write models
Implements LightGBM::Boosting.
|
inline |
Save model to string.
start_iteration | The model will be saved start from |
num_iterations | Number of model that want to save, -1 means save all |
|
overridevirtual |
Implements LightGBM::Boosting.
|
inlineoverridevirtual |
Implements LightGBM::GBDTBase.
|
inlineoverridevirtual |
Shuffle Existing Models.
Implements LightGBM::Boosting.
|
inlineoverridevirtual |
Get Type name of this boosting object.
Implements LightGBM::Boosting.
|
overridevirtual |
Perform a full training procedure.
snapshot_freq | frequence of snapshot |
model_output_path | path of model file |
Implements LightGBM::Boosting.
|
overridevirtual |
Training logic.
gradients | nullptr for using default objective, otherwise use self-defined boosting |
hessians | nullptr for using default objective, otherwise use self-defined boosting |
Implements LightGBM::Boosting.
Reimplemented in LightGBM::DART, and LightGBM::RF.
|
protectedvirtual |
updating score after tree was trained
tree | Trained tree of this iteration |
cur_tree_id | Current tree for multiclass training |