Medial Code Documentation
Loading...
Searching...
No Matches
leaf_splits.hpp
1#ifndef LIGHTGBM_TREELEARNER_LEAF_SPLITS_HPP_
2#define LIGHTGBM_TREELEARNER_LEAF_SPLITS_HPP_
3
4#include <limits>
5
6#include <LightGBM/meta.h>
7#include "data_partition.hpp"
8
9#include <vector>
10
11namespace LightGBM {
12
17public:
18 LeafSplits(data_size_t num_data)
19 :num_data_in_leaf_(num_data), num_data_(num_data),
20 data_indices_(nullptr) {
21 }
22 void ResetNumData(data_size_t num_data) {
23 num_data_ = num_data;
24 num_data_in_leaf_ = num_data;
25 }
26 ~LeafSplits() {
27 }
28
37 void Init(int leaf, const DataPartition* data_partition, double sum_gradients, double sum_hessians) {
38 leaf_index_ = leaf;
39 data_indices_ = data_partition->GetIndexOnLeaf(leaf, &num_data_in_leaf_);
40 sum_gradients_ = sum_gradients;
41 sum_hessians_ = sum_hessians;
42 min_val_ = -std::numeric_limits<double>::max();
43 max_val_ = std::numeric_limits<double>::max();
44 }
45
46 void SetValueConstraint(double min, double max) {
47 min_val_ = min;
48 max_val_ = max;
49 }
50
51
57 void Init(const score_t* gradients, const score_t* hessians) {
58 num_data_in_leaf_ = num_data_;
59 leaf_index_ = 0;
60 data_indices_ = nullptr;
61 double tmp_sum_gradients = 0.0f;
62 double tmp_sum_hessians = 0.0f;
63#pragma omp parallel for schedule(static) reduction(+:tmp_sum_gradients, tmp_sum_hessians)
64 for (data_size_t i = 0; i < num_data_in_leaf_; ++i) {
65 tmp_sum_gradients += gradients[i];
66 tmp_sum_hessians += hessians[i];
67 }
68 sum_gradients_ = tmp_sum_gradients;
69 sum_hessians_ = tmp_sum_hessians;
70 min_val_ = -std::numeric_limits<double>::max();
71 max_val_ = std::numeric_limits<double>::max();
72 }
73
81 void Init(int leaf, const DataPartition* data_partition, const score_t* gradients, const score_t* hessians) {
82 leaf_index_ = leaf;
83 data_indices_ = data_partition->GetIndexOnLeaf(leaf, &num_data_in_leaf_);
84 double tmp_sum_gradients = 0.0f;
85 double tmp_sum_hessians = 0.0f;
86#pragma omp parallel for schedule(static) reduction(+:tmp_sum_gradients, tmp_sum_hessians)
87 for (data_size_t i = 0; i < num_data_in_leaf_; ++i) {
88 data_size_t idx = data_indices_[i];
89 tmp_sum_gradients += gradients[idx];
90 tmp_sum_hessians += hessians[idx];
91 }
92 sum_gradients_ = tmp_sum_gradients;
93 sum_hessians_ = tmp_sum_hessians;
94 min_val_ = -std::numeric_limits<double>::max();
95 max_val_ = std::numeric_limits<double>::max();
96 }
97
98
104 void Init(double sum_gradients, double sum_hessians) {
105 leaf_index_ = 0;
106 sum_gradients_ = sum_gradients;
107 sum_hessians_ = sum_hessians;
108 min_val_ = -std::numeric_limits<double>::max();
109 max_val_ = std::numeric_limits<double>::max();
110 }
111
115 void Init() {
116 leaf_index_ = -1;
117 data_indices_ = nullptr;
118 num_data_in_leaf_ = 0;
119 min_val_ = -std::numeric_limits<double>::max();
120 max_val_ = std::numeric_limits<double>::max();
121 }
122
123
125 int LeafIndex() const { return leaf_index_; }
126
128 data_size_t num_data_in_leaf() const { return num_data_in_leaf_; }
129
131 double sum_gradients() const { return sum_gradients_; }
132
134 double sum_hessians() const { return sum_hessians_; }
135
136 double max_constraint() const { return max_val_; }
137
138 double min_constraint() const { return min_val_; }
139
141 const data_size_t* data_indices() const { return data_indices_; }
142
143
144private:
146 int leaf_index_;
148 data_size_t num_data_in_leaf_;
150 data_size_t num_data_;
152 double sum_gradients_;
154 double sum_hessians_;
156 const data_size_t* data_indices_;
157 double min_val_;
158 double max_val_;
159};
160
161} // namespace LightGBM
162#endif // LightGBM_TREELEARNER_LEAF_SPLITS_HPP_
DataPartition is used to store the the partition of data on tree.
Definition data_partition.hpp:17
const data_size_t * GetIndexOnLeaf(int leaf, data_size_t *out_len) const
Get the data indices of one leaf.
Definition data_partition.hpp:94
used to find split candidates for a leaf
Definition leaf_splits.hpp:16
const data_size_t * data_indices() const
Get indices of data of current leaf.
Definition leaf_splits.hpp:141
void Init(int leaf, const DataPartition *data_partition, const score_t *gradients, const score_t *hessians)
Init splits on current leaf of partial data.
Definition leaf_splits.hpp:81
int LeafIndex() const
Get current leaf index.
Definition leaf_splits.hpp:125
void Init(double sum_gradients, double sum_hessians)
Init splits on current leaf, only update sum_gradients and sum_hessians.
Definition leaf_splits.hpp:104
double sum_gradients() const
Get sum of gradients of current leaf.
Definition leaf_splits.hpp:131
data_size_t num_data_in_leaf() const
Get numer of data in current leaf.
Definition leaf_splits.hpp:128
void Init(int leaf, const DataPartition *data_partition, double sum_gradients, double sum_hessians)
Init split on current leaf on partial data.
Definition leaf_splits.hpp:37
void Init()
Init splits on current leaf.
Definition leaf_splits.hpp:115
void Init(const score_t *gradients, const score_t *hessians)
Init splits on current leaf, it will traverse all data to sum up the results.
Definition leaf_splits.hpp:57
double sum_hessians() const
Get sum of hessians of current leaf.
Definition leaf_splits.hpp:134
desc and descl2 fields must be written in reStructuredText format
Definition application.h:10
float score_t
Type of score, and gradients.
Definition meta.h:26
int32_t data_size_t
Type of data size, it is better to use signed type.
Definition meta.h:14